diff -ruN linux-org/arch/arm/common/it8152.c linux/arch/arm/common/it8152.c --- linux-org/arch/arm/common/it8152.c 2022-03-25 09:55:36.273474651 +0100 +++ linux/arch/arm/common/it8152.c 2022-03-25 10:15:23.333003086 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -75,6 +76,7 @@ .irq_ack = it8152_mask_irq, .irq_mask = it8152_mask_irq, .irq_unmask = it8152_unmask_irq, + .flags = IRQCHIP_PIPELINE_SAFE, }; void it8152_init_irq(void) @@ -124,21 +126,21 @@ bits_pd &= ((1 << IT8152_PD_IRQ_COUNT) - 1); while (bits_pd) { i = __ffs(bits_pd); - generic_handle_irq(IT8152_PD_IRQ(i)); + ipipe_handle_demuxed_irq(IT8152_PD_IRQ(i)); bits_pd &= ~(1 << i); } bits_lp &= ((1 << IT8152_LP_IRQ_COUNT) - 1); while (bits_lp) { i = __ffs(bits_lp); - generic_handle_irq(IT8152_LP_IRQ(i)); + ipipe_handle_demuxed_irq(IT8152_LP_IRQ(i)); bits_lp &= ~(1 << i); } bits_ld &= ((1 << IT8152_LD_IRQ_COUNT) - 1); while (bits_ld) { i = __ffs(bits_ld); - generic_handle_irq(IT8152_LD_IRQ(i)); + ipipe_handle_demuxed_irq(IT8152_LD_IRQ(i)); bits_ld &= ~(1 << i); } } diff -ruN linux-org/arch/arm/include/asm/arch_timer.h linux/arch/arm/include/asm/arch_timer.h --- linux-org/arch/arm/include/asm/arch_timer.h 2022-03-25 09:55:36.289474592 +0100 +++ linux/arch/arm/include/asm/arch_timer.h 2022-03-25 10:15:23.333003086 +0100 @@ -106,6 +106,10 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) { +#ifdef CONFIG_IPIPE + /* Enable access to user-space (may not be needed) */ + cntkctl |= ARCH_TIMER_USR_PCT_ACCESS_EN; +#endif asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl)); } diff -ruN linux-org/arch/arm/include/asm/assembler.h linux/arch/arm/include/asm/assembler.h --- linux-org/arch/arm/include/asm/assembler.h 2022-03-25 09:55:36.293474576 +0100 +++ linux/arch/arm/include/asm/assembler.h 2022-03-25 10:15:23.333003086 +0100 @@ -100,6 +100,18 @@ .macro enable_irq_notrace cpsie i .endm + + .macro disable_irq_cond +#ifdef CONFIG_IPIPE + cpsid i +#endif /* CONFIG_IPIPE */ + .endm + + .macro enable_irq_cond +#ifdef CONFIG_IPIPE + cpsie i +#endif /* CONFIG_IPIPE */ + .endm #else .macro disable_irq_notrace msr cpsr_c, #PSR_I_BIT | SVC_MODE @@ -108,10 +120,22 @@ .macro enable_irq_notrace msr cpsr_c, #SVC_MODE .endm + + .macro disable_irq_cond +#ifdef CONFIG_IPIPE + msr cpsr_c, #PSR_I_BIT | SVC_MODE +#endif /* CONFIG_IPIPE */ + .endm + + .macro enable_irq_cond +#ifdef CONFIG_IPIPE + msr cpsr_c, #SVC_MODE +#endif /* CONFIG_IPIPE */ + .endm #endif .macro asm_trace_hardirqs_off, save=1 -#if defined(CONFIG_TRACE_IRQFLAGS) +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_IPIPE) .if \save stmdb sp!, {r0-r3, ip, lr} .endif @@ -123,7 +147,7 @@ .endm .macro asm_trace_hardirqs_on, cond=al, save=1 -#if defined(CONFIG_TRACE_IRQFLAGS) +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_IPIPE) /* * actually the registers should be pushed and pop'd conditionally, but * after bl the flags are certainly clobbered diff -ruN linux-org/arch/arm/include/asm/atomic.h linux/arch/arm/include/asm/atomic.h --- linux-org/arch/arm/include/asm/atomic.h 2022-03-25 09:55:36.293474576 +0100 +++ linux/arch/arm/include/asm/atomic.h 2022-03-25 10:15:23.333003086 +0100 @@ -168,9 +168,9 @@ { \ unsigned long flags; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ v->counter c_op i; \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ } \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ @@ -179,10 +179,10 @@ unsigned long flags; \ int val; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ v->counter c_op i; \ val = v->counter; \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ \ return val; \ } @@ -193,10 +193,10 @@ unsigned long flags; \ int val; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ val = v->counter; \ v->counter c_op i; \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ \ return val; \ } @@ -206,11 +206,11 @@ int ret; unsigned long flags; - raw_local_irq_save(flags); + flags = hard_local_irq_save(); ret = v->counter; if (likely(ret == old)) v->counter = new; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return ret; } diff -ruN linux-org/arch/arm/include/asm/bitops.h linux/arch/arm/include/asm/bitops.h --- linux-org/arch/arm/include/asm/bitops.h 2022-03-25 09:55:36.293474576 +0100 +++ linux/arch/arm/include/asm/bitops.h 2022-03-25 10:15:23.333003086 +0100 @@ -40,9 +40,9 @@ p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); *p |= mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline void ____atomic_clear_bit(unsigned int bit, volatile unsigned long *p) @@ -52,9 +52,9 @@ p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); *p &= ~mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline void ____atomic_change_bit(unsigned int bit, volatile unsigned long *p) @@ -64,9 +64,9 @@ p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); *p ^= mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); } static inline int @@ -78,10 +78,10 @@ p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); res = *p; *p = res | mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return (res & mask) != 0; } @@ -95,10 +95,10 @@ p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); res = *p; *p = res & ~mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return (res & mask) != 0; } @@ -112,10 +112,10 @@ p += BIT_WORD(bit); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); res = *p; *p = res ^ mask; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return (res & mask) != 0; } diff -ruN linux-org/arch/arm/include/asm/cmpxchg.h linux/arch/arm/include/asm/cmpxchg.h --- linux-org/arch/arm/include/asm/cmpxchg.h 2022-03-25 09:55:36.293474576 +0100 +++ linux/arch/arm/include/asm/cmpxchg.h 2022-03-25 10:15:23.333003086 +0100 @@ -77,17 +77,17 @@ #error SMP is not supported on this platform #endif case 1: - raw_local_irq_save(flags); + flags = hard_local_irq_save(); ret = *(volatile unsigned char *)ptr; *(volatile unsigned char *)ptr = x; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); break; case 4: - raw_local_irq_save(flags); + flags = hard_local_irq_save(); ret = *(volatile unsigned long *)ptr; *(volatile unsigned long *)ptr = x; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); break; #else case 1: diff -ruN linux-org/arch/arm/include/asm/efi.h linux/arch/arm/include/asm/efi.h --- linux-org/arch/arm/include/asm/efi.h 2022-03-25 09:55:36.293474576 +0100 +++ linux/arch/arm/include/asm/efi.h 2022-03-25 10:15:23.333003086 +0100 @@ -41,7 +41,7 @@ static inline void efi_set_pgd(struct mm_struct *mm) { - check_and_switch_context(mm, NULL); + check_and_switch_context(mm, NULL, true); } void efi_virtmap_load(void); diff -ruN linux-org/arch/arm/include/asm/entry-macro-multi.S linux/arch/arm/include/asm/entry-macro-multi.S --- linux-org/arch/arm/include/asm/entry-macro-multi.S 2022-03-24 17:11:29.579130152 +0100 +++ linux/arch/arm/include/asm/entry-macro-multi.S 2022-03-25 10:15:23.333003086 +0100 @@ -12,7 +12,11 @@ @ routine called with r0 = irq number, r1 = struct pt_regs * @ badrne lr, 1b +#ifdef CONFIG_IPIPE + bne __ipipe_grab_irq +#else bne asm_do_IRQ +#endif #ifdef CONFIG_SMP /* @@ -25,8 +29,12 @@ ALT_UP_B(9997f) movne r1, sp badrne lr, 1b +#ifdef CONFIG_IPIPE + bne __ipipe_grab_ipi +#else bne do_IPI #endif +#endif 9997: .endm diff -ruN linux-org/arch/arm/include/asm/hw_irq.h linux/arch/arm/include/asm/hw_irq.h --- linux-org/arch/arm/include/asm/hw_irq.h 2022-03-24 17:11:29.583130144 +0100 +++ linux/arch/arm/include/asm/hw_irq.h 2022-03-25 10:15:23.333003086 +0100 @@ -14,4 +14,8 @@ #define ARCH_IRQ_INIT_FLAGS (IRQ_NOREQUEST | IRQ_NOPROBE) +#define IPIPE_NR_ROOT_IRQS 1024 + +#define IPIPE_NR_XIRQS IPIPE_NR_ROOT_IRQS + #endif diff -ruN linux-org/arch/arm/include/asm/ipipe_base.h linux/arch/arm/include/asm/ipipe_base.h --- linux-org/arch/arm/include/asm/ipipe_base.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/include/asm/ipipe_base.h 2022-03-25 10:15:23.337003070 +0100 @@ -0,0 +1,90 @@ +/* -*- linux-c -*- + * arch/arm/include/asm/ipipe_base.h + * + * Copyright (C) 2007 Gilles Chanteperdrix. + * Copyright (C) 2010 Philippe Gerum (SMP port). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __ASM_ARM_IPIPE_BASE_H +#define __ASM_ARM_IPIPE_BASE_H + +#include + +#ifdef CONFIG_IPIPE + +#ifdef CONFIG_SMP + +extern unsigned __ipipe_first_ipi; + +#define IPIPE_CRITICAL_IPI __ipipe_first_ipi +#define IPIPE_HRTIMER_IPI (IPIPE_CRITICAL_IPI + 1) +#define IPIPE_RESCHEDULE_IPI (IPIPE_CRITICAL_IPI + 2) +#define IPIPE_SERVICE_VNMI (IPIPE_CRITICAL_IPI + 3) + +#define IPIPE_LAST_IPI IPIPE_SERVICE_VNMI + +#define hard_smp_processor_id() raw_smp_processor_id() + +#ifdef CONFIG_SMP_ON_UP +unsigned __ipipe_processor_id(void); + +#define ipipe_processor_id() \ + ({ \ + register unsigned int cpunum __asm__ ("r0"); \ + register unsigned int r1 __asm__ ("r1"); \ + register unsigned int r2 __asm__ ("r2"); \ + register unsigned int r3 __asm__ ("r3"); \ + register unsigned int ip __asm__ ("ip"); \ + register unsigned int lr __asm__ ("lr"); \ + __asm__ __volatile__ ("\n" \ + "1: bl __ipipe_processor_id\n" \ + " .pushsection \".alt.smp.init\", \"a\"\n" \ + " .long 1b\n" \ + " mov %0, #0\n" \ + " .popsection" \ + : "=r"(cpunum), "=r"(r1), "=r"(r2), "=r"(r3), \ + "=r"(ip), "=r"(lr) \ + : /* */ : "cc"); \ + cpunum; \ + }) +#else /* !SMP_ON_UP */ +#define ipipe_processor_id() raw_smp_processor_id() +#endif /* !SMP_ON_UP */ + +#define IPIPE_ARCH_HAVE_VIRQ_IPI + +#else /* !CONFIG_SMP */ +#define ipipe_processor_id() (0) +#endif /* !CONFIG_IPIPE */ + +/* ARM traps */ +#define IPIPE_TRAP_ACCESS 0 /* Data or instruction access exception */ +#define IPIPE_TRAP_SECTION 1 /* Section fault */ +#define IPIPE_TRAP_DABT 2 /* Generic data abort */ +#define IPIPE_TRAP_UNKNOWN 3 /* Unknown exception */ +#define IPIPE_TRAP_BREAK 4 /* Instruction breakpoint */ +#define IPIPE_TRAP_FPU 5 /* Floating point exception */ +#define IPIPE_TRAP_VFP 6 /* VFP floating point exception */ +#define IPIPE_TRAP_UNDEFINSTR 7 /* Undefined instruction */ +#define IPIPE_TRAP_ALIGNMENT 8 /* Unaligned access exception */ +#define IPIPE_TRAP_MAYDAY 9 /* Internal recovery trap */ +#define IPIPE_NR_FAULTS 10 + +#endif /* CONFIG_IPIPE */ + +#endif /* __ASM_ARM_IPIPE_BASE_H */ diff -ruN linux-org/arch/arm/include/asm/ipipe.h linux/arch/arm/include/asm/ipipe.h --- linux-org/arch/arm/include/asm/ipipe.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/include/asm/ipipe.h 2022-03-25 10:15:23.337003070 +0100 @@ -0,0 +1,282 @@ +/* -*- linux-c -*- + * arch/arm/include/asm/ipipe.h + * + * Copyright (C) 2002-2005 Philippe Gerum. + * Copyright (C) 2005 Stelian Pop. + * Copyright (C) 2006-2008 Gilles Chanteperdrix. + * Copyright (C) 2010 Philippe Gerum (SMP port). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __ARM_IPIPE_H +#define __ARM_IPIPE_H + +#include + +#ifdef CONFIG_IPIPE + +#define BROKEN_BUILTIN_RETURN_ADDRESS +#undef __BUILTIN_RETURN_ADDRESS0 +#undef __BUILTIN_RETURN_ADDRESS1 +#ifdef CONFIG_FRAME_POINTER +#define __BUILTIN_RETURN_ADDRESS0 arm_return_addr(0) +#define __BUILTIN_RETURN_ADDRESS1 arm_return_addr(1) +extern unsigned long arm_return_addr(int level); +#else +#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0)) +#define __BUILTIN_RETURN_ADDRESS1 (0) +#endif + +#include +#include + +#define IPIPE_CORE_RELEASE 1 + +struct ipipe_domain; +struct timekeeper; + +#define IPIPE_TSC_TYPE_NONE 0 +#define IPIPE_TSC_TYPE_FREERUNNING 1 +#define IPIPE_TSC_TYPE_DECREMENTER 2 +#define IPIPE_TSC_TYPE_FREERUNNING_COUNTDOWN 3 +#define IPIPE_TSC_TYPE_FREERUNNING_TWICE 4 +#define IPIPE_TSC_TYPE_FREERUNNING_ARCH 5 + +/* tscinfo, exported to user-space */ +struct __ipipe_tscinfo { + unsigned type; + unsigned freq; + unsigned long counter_vaddr; + union { + struct { + unsigned long counter_paddr; + unsigned long long mask; + }; + struct { + unsigned *counter; /* Hw counter physical address */ + unsigned long long mask; /* Significant bits in the hw counter. */ + unsigned long long *tsc; /* 64 bits tsc value. */ + } fr; + struct { + unsigned *counter; /* Hw counter physical address */ + unsigned long long mask; /* Significant bits in the hw counter. */ + unsigned *last_cnt; /* Counter value when updating + tsc value. */ + unsigned long long *tsc; /* 64 bits tsc value. */ + } dec; + } u; + unsigned int (*refresh_freq)(void); +}; + +struct ipipe_arch_sysinfo { + struct __ipipe_tscinfo tsc; +}; + + +/* arch specific stuff */ +extern char __ipipe_tsc_area[]; +void __ipipe_mach_get_tscinfo(struct __ipipe_tscinfo *info); + +#ifdef CONFIG_IPIPE_ARM_KUSER_TSC +unsigned long long __ipipe_tsc_get(void) __attribute__((long_call)); +void __ipipe_tsc_register(struct __ipipe_tscinfo *info); +void __ipipe_tsc_update(void); +void __ipipe_update_vsyscall(struct timekeeper *tk); +extern unsigned long __ipipe_kuser_tsc_freq; +#define __ipipe_hrclock_freq __ipipe_kuser_tsc_freq +#else /* ! generic tsc */ +unsigned long long __ipipe_mach_get_tsc(void); +#define __ipipe_tsc_get() __ipipe_mach_get_tsc() +static inline void __ipipe_update_vsyscall(struct timekeeper *tk) {} +#ifndef __ipipe_hrclock_freq +extern unsigned long __ipipe_hrtimer_freq; +#define __ipipe_hrclock_freq __ipipe_hrtimer_freq +#endif /* !__ipipe_mach_hrclock_freq */ +#endif /* ! generic tsc */ + +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL +extern void (*__ipipe_mach_hrtimer_debug)(unsigned irq); +#endif /* CONFIG_IPIPE_DEBUG_INTERNAL */ + +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + +#define ipipe_mm_switch_protect(flags) \ + do { \ + (void)(flags); \ + } while(0) + +#define ipipe_mm_switch_unprotect(flags) \ + do { \ + (void)(flags); \ + } while(0) + +#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + +#define ipipe_mm_switch_protect(flags) \ + flags = hard_cond_local_irq_save() + +#define ipipe_mm_switch_unprotect(flags) \ + hard_cond_local_irq_restore(flags) + +#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + +#define ipipe_get_active_mm() (__this_cpu_read(ipipe_percpu.active_mm)) + +#define ipipe_read_tsc(t) do { t = __ipipe_tsc_get(); } while(0) +#define __ipipe_read_timebase() __ipipe_tsc_get() + +#define ipipe_tsc2ns(t) \ +({ \ + unsigned long long delta = (t)*1000; \ + do_div(delta, __ipipe_hrclock_freq / 1000000 + 1); \ + (unsigned long)delta; \ +}) +#define ipipe_tsc2us(t) \ +({ \ + unsigned long long delta = (t); \ + do_div(delta, __ipipe_hrclock_freq / 1000000 + 1); \ + (unsigned long)delta; \ +}) + +static inline const char *ipipe_clock_name(void) +{ + return "ipipe_tsc"; +} + +/* Private interface -- Internal use only */ + +#define __ipipe_enable_irq(irq) enable_irq(irq) +#define __ipipe_disable_irq(irq) disable_irq(irq) + +static inline void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq) +{ } + +static inline void __ipipe_disable_irqdesc(struct ipipe_domain *ipd, unsigned irq) +{ } + +static inline void ipipe_mute_pic(void) +{ } + +static inline void ipipe_unmute_pic(void) +{ } + +#define ipipe_notify_root_preemption() do { } while(0) + +#ifdef CONFIG_SMP +void __ipipe_early_core_setup(void); +void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd); +void __ipipe_root_localtimer(unsigned int irq, void *cookie); +void __ipipe_send_vnmi(void (*fn)(void *), cpumask_t cpumask, void *arg); +void __ipipe_do_vnmi(unsigned int irq, void *cookie); +void __ipipe_grab_ipi(unsigned svc, struct pt_regs *regs); +void __ipipe_ipis_alloc(void); +void __ipipe_ipis_request(void); + +static inline void ipipe_handle_multi_ipi(int irq, struct pt_regs *regs) +{ + __ipipe_grab_ipi(irq, regs); +} + +#ifdef CONFIG_SMP_ON_UP +extern struct static_key __ipipe_smp_key; +#define ipipe_smp_p (static_key_true(&__ipipe_smp_key)) +#endif /* SMP_ON_UP */ +#else /* !CONFIG_SMP */ +#define __ipipe_early_core_setup() do { } while(0) +#define __ipipe_hook_critical_ipi(ipd) do { } while(0) +#endif /* !CONFIG_SMP */ +#ifndef __ipipe_mach_init_platform +#define __ipipe_mach_init_platform() do { } while(0) +#endif + +void __ipipe_enable_pipeline(void); + +void __ipipe_do_critical_sync(unsigned irq, void *cookie); + +void __ipipe_grab_irq(int irq, struct pt_regs *regs); + +void __ipipe_exit_irq(struct pt_regs *regs); + +static inline void ipipe_handle_multi_irq(int irq, struct pt_regs *regs) +{ + __ipipe_grab_irq(irq, regs); +} + +static inline unsigned long __ipipe_ffnz(unsigned long ul) +{ + return ffs(ul) - 1; +} + +#define __ipipe_root_tick_p(regs) (!arch_irqs_disabled_flags(regs->ARM_cpsr)) + +#ifdef CONFIG_IRQ_DOMAIN +static inline +int ipipe_handle_domain_irq(struct irq_domain *domain, + unsigned int hwirq, struct pt_regs *regs) +{ + unsigned int irq; + irq = irq_find_mapping(domain, hwirq); + ipipe_handle_multi_irq(irq, regs); + + return 0; +} +#endif /* irq domains */ + +#else /* !CONFIG_IPIPE */ + +#include +#include + +#define __ipipe_tsc_update() do { } while(0) + +#define hard_smp_processor_id() smp_processor_id() + +#define ipipe_mm_switch_protect(flags) \ + do { \ + (void) (flags); \ + } while(0) + +#define ipipe_mm_switch_unprotect(flags) \ + do { \ + (void) (flags); \ + } while(0) + +static inline void ipipe_handle_multi_irq(int irq, struct pt_regs *regs) +{ + handle_IRQ(irq, regs); +} + +#ifdef CONFIG_SMP +static inline void ipipe_handle_multi_ipi(int irq, struct pt_regs *regs) +{ + handle_IPI(irq, regs); +} +#endif /* CONFIG_SMP */ + +static inline +int ipipe_handle_domain_irq(struct irq_domain *domain, + unsigned int hwirq, struct pt_regs *regs) +{ + return handle_domain_irq(domain, hwirq, regs); +} + +struct timekeeper; +static inline void __ipipe_update_vsyscall(struct timekeeper *tk) {} + +#endif /* !CONFIG_IPIPE */ + +#endif /* !__ARM_IPIPE_H */ diff -ruN linux-org/arch/arm/include/asm/ipipe_hwirq.h linux/arch/arm/include/asm/ipipe_hwirq.h --- linux-org/arch/arm/include/asm/ipipe_hwirq.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/include/asm/ipipe_hwirq.h 2022-03-25 10:15:23.337003070 +0100 @@ -0,0 +1,250 @@ +/* -*- linux-c -*- + * arch/arm/include/asm/ipipe_hwirq.h + * + * Copyright (C) 2002-2005 Philippe Gerum. + * Copyright (C) 2005 Stelian Pop. + * Copyright (C) 2006-2008 Gilles Chanteperdrix. + * Copyright (C) 2010 Philippe Gerum (SMP port). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _ASM_ARM_IPIPE_HWIRQ_H +#define _ASM_ARM_IPIPE_HWIRQ_H + +#define hard_local_irq_restore_notrace(x) \ + __asm__ __volatile__( \ + "msr cpsr_c, %0 @ hard_local_irq_restore\n" \ + : \ + : "r" (x) \ + : "memory", "cc") + +static inline void hard_local_irq_disable_notrace(void) +{ +#if __LINUX_ARM_ARCH__ >= 6 + __asm__("cpsid i @ __cli" : : : "memory", "cc"); +#else /* linux arch <= 5 */ + unsigned long temp; + __asm__ __volatile__( + "mrs %0, cpsr @ hard_local_irq_disable\n" + "orr %0, %0, #128\n" + "msr cpsr_c, %0" + : "=r" (temp) + : + : "memory", "cc"); +#endif /* linux arch <= 5 */ +} + +static inline void hard_local_irq_enable_notrace(void) +{ +#if __LINUX_ARM_ARCH__ >= 6 + __asm__("cpsie i @ __sti" : : : "memory", "cc"); +#else /* linux arch <= 5 */ + unsigned long temp; + __asm__ __volatile__( + "mrs %0, cpsr @ hard_local_irq_enable\n" + "bic %0, %0, #128\n" + "msr cpsr_c, %0" + : "=r" (temp) + : + : "memory", "cc"); +#endif /* linux arch <= 5 */ +} + +static inline void hard_local_fiq_disable_notrace(void) +{ +#if __LINUX_ARM_ARCH__ >= 6 + __asm__("cpsid f @ __clf" : : : "memory", "cc"); +#else /* linux arch <= 5 */ + unsigned long temp; + __asm__ __volatile__( + "mrs %0, cpsr @ clf\n" + "orr %0, %0, #64\n" + "msr cpsr_c, %0" + : "=r" (temp) + : + : "memory", "cc"); +#endif /* linux arch <= 5 */ +} + +static inline void hard_local_fiq_enable_notrace(void) +{ +#if __LINUX_ARM_ARCH__ >= 6 + __asm__("cpsie f @ __stf" : : : "memory", "cc"); +#else /* linux arch <= 5 */ + unsigned long temp; + __asm__ __volatile__( + "mrs %0, cpsr @ stf\n" + "bic %0, %0, #64\n" + "msr cpsr_c, %0" + : "=r" (temp) + : + : "memory", "cc"); +#endif /* linux arch <= 5 */ +} + +static inline unsigned long hard_local_irq_save_notrace(void) +{ + unsigned long res; +#if __LINUX_ARM_ARCH__ >= 6 + __asm__ __volatile__( + "mrs %0, cpsr @ hard_local_irq_save\n" + "cpsid i" + : "=r" (res) : : "memory", "cc"); +#else /* linux arch <= 5 */ + unsigned long temp; + __asm__ __volatile__( + "mrs %0, cpsr @ hard_local_irq_save\n" + "orr %1, %0, #128\n" + "msr cpsr_c, %1" + : "=r" (res), "=r" (temp) + : + : "memory", "cc"); +#endif /* linux arch <= 5 */ + return res; +} + +#include + +#ifdef CONFIG_IPIPE + +#include + +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return (int)((flags) & PSR_I_BIT); +} + +static inline unsigned long hard_local_save_flags(void) +{ + unsigned long flags; + __asm__ __volatile__( + "mrs %0, cpsr @ hard_local_save_flags" + : "=r" (flags) : : "memory", "cc"); + return flags; +} + +#define hard_irqs_disabled_flags(flags) arch_irqs_disabled_flags(flags) + +static inline int hard_irqs_disabled(void) +{ + return hard_irqs_disabled_flags(hard_local_save_flags()); +} + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + +static inline void hard_local_irq_disable(void) +{ + if (!hard_irqs_disabled()) { + hard_local_irq_disable_notrace(); + ipipe_trace_begin(0x80000000); + } +} + +static inline void hard_local_irq_enable(void) +{ + if (hard_irqs_disabled()) { + ipipe_trace_end(0x80000000); + hard_local_irq_enable_notrace(); + } +} + +static inline unsigned long hard_local_irq_save(void) +{ + unsigned long flags; + + flags = hard_local_irq_save_notrace(); + if (!arch_irqs_disabled_flags(flags)) + ipipe_trace_begin(0x80000001); + + return flags; +} + +static inline void hard_local_irq_restore(unsigned long x) +{ + if (!arch_irqs_disabled_flags(x)) + ipipe_trace_end(0x80000001); + + hard_local_irq_restore_notrace(x); +} + +#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ + +#define hard_local_irq_disable hard_local_irq_disable_notrace +#define hard_local_irq_enable hard_local_irq_enable_notrace +#define hard_local_irq_save hard_local_irq_save_notrace +#define hard_local_irq_restore hard_local_irq_restore_notrace + +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + +#define arch_local_irq_disable() \ + ({ \ + ipipe_stall_root(); \ + barrier(); \ + }) + +#define arch_local_irq_enable() \ + do { \ + barrier(); \ + ipipe_unstall_root(); \ + } while (0) + +#define local_fiq_enable() hard_local_fiq_enable_notrace() + +#define local_fiq_disable() hard_local_fiq_disable_notrace() + +#define arch_local_irq_restore(flags) \ + do { \ + if (!arch_irqs_disabled_flags(flags)) \ + arch_local_irq_enable(); \ + } while (0) + +#define arch_local_irq_save() \ + ({ \ + unsigned long _flags; \ + _flags = ipipe_test_and_stall_root() << 7; \ + barrier(); \ + _flags; \ + }) + +#define arch_local_save_flags() \ + ({ \ + unsigned long _flags; \ + _flags = ipipe_test_root() << 7; \ + barrier(); \ + _flags; \ + }) + +#define arch_irqs_disabled() ipipe_test_root() +#define hard_irq_disable() hard_local_irq_disable() + +static inline unsigned long arch_mangle_irq_bits(int virt, unsigned long real) +{ + /* Merge virtual and real interrupt mask bits into a single + 32bit word. */ + return (real & ~(1L << 8)) | ((virt != 0) << 8); +} + +static inline int arch_demangle_irq_bits(unsigned long *x) +{ + int virt = (*x & (1 << 8)) != 0; + *x &= ~(1L << 8); + return virt; +} + +#endif /* !CONFIG_IPIPE */ + +#endif /* _ASM_ARM_IPIPE_HWIRQ_H */ diff -ruN linux-org/arch/arm/include/asm/irqflags.h linux/arch/arm/include/asm/irqflags.h --- linux-org/arch/arm/include/asm/irqflags.h 2022-03-24 17:11:29.583130144 +0100 +++ linux/arch/arm/include/asm/irqflags.h 2022-03-25 10:15:23.337003070 +0100 @@ -6,6 +6,10 @@ #include +#include + +#ifndef CONFIG_IPIPE + /* * CPU interrupt mask handling. */ @@ -56,13 +60,6 @@ #define local_fiq_enable() __asm__("cpsie f @ __stf" : : : "memory", "cc") #define local_fiq_disable() __asm__("cpsid f @ __clf" : : : "memory", "cc") -#ifndef CONFIG_CPU_V7M -#define local_abt_enable() __asm__("cpsie a @ __sta" : : : "memory", "cc") -#define local_abt_disable() __asm__("cpsid a @ __cla" : : : "memory", "cc") -#else -#define local_abt_enable() do { } while (0) -#define local_abt_disable() do { } while (0) -#endif #else /* @@ -193,5 +190,15 @@ #include +#endif /* ifndef IPIPE */ + +#ifndef CONFIG_CPU_V7M +#define local_abt_enable() __asm__("cpsie a @ __sta" : : : "memory", "cc") +#define local_abt_disable() __asm__("cpsid a @ __cla" : : : "memory", "cc") +#else +#define local_abt_enable() do { } while (0) +#define local_abt_disable() do { } while (0) +#endif + #endif /* ifdef __KERNEL__ */ #endif /* ifndef __ASM_ARM_IRQFLAGS_H */ diff -ruN linux-org/arch/arm/include/asm/irqflags.h.orig linux/arch/arm/include/asm/irqflags.h.orig --- linux-org/arch/arm/include/asm/irqflags.h.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/include/asm/irqflags.h.orig 2022-03-25 10:09:27.334344106 +0100 @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_ARM_IRQFLAGS_H +#define __ASM_ARM_IRQFLAGS_H + +#ifdef __KERNEL__ + +#include + +/* + * CPU interrupt mask handling. + */ +#ifdef CONFIG_CPU_V7M +#define IRQMASK_REG_NAME_R "primask" +#define IRQMASK_REG_NAME_W "primask" +#define IRQMASK_I_BIT 1 +#else +#define IRQMASK_REG_NAME_R "cpsr" +#define IRQMASK_REG_NAME_W "cpsr_c" +#define IRQMASK_I_BIT PSR_I_BIT +#endif + +#if __LINUX_ARM_ARCH__ >= 6 + +#define arch_local_irq_save arch_local_irq_save +static inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags; + + asm volatile( + " mrs %0, " IRQMASK_REG_NAME_R " @ arch_local_irq_save\n" + " cpsid i" + : "=r" (flags) : : "memory", "cc"); + return flags; +} + +#define arch_local_irq_enable arch_local_irq_enable +static inline void arch_local_irq_enable(void) +{ + asm volatile( + " cpsie i @ arch_local_irq_enable" + : + : + : "memory", "cc"); +} + +#define arch_local_irq_disable arch_local_irq_disable +static inline void arch_local_irq_disable(void) +{ + asm volatile( + " cpsid i @ arch_local_irq_disable" + : + : + : "memory", "cc"); +} + +#define local_fiq_enable() __asm__("cpsie f @ __stf" : : : "memory", "cc") +#define local_fiq_disable() __asm__("cpsid f @ __clf" : : : "memory", "cc") + +#ifndef CONFIG_CPU_V7M +#define local_abt_enable() __asm__("cpsie a @ __sta" : : : "memory", "cc") +#define local_abt_disable() __asm__("cpsid a @ __cla" : : : "memory", "cc") +#else +#define local_abt_enable() do { } while (0) +#define local_abt_disable() do { } while (0) +#endif +#else + +/* + * Save the current interrupt enable state & disable IRQs + */ +#define arch_local_irq_save arch_local_irq_save +static inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags, temp; + + asm volatile( + " mrs %0, cpsr @ arch_local_irq_save\n" + " orr %1, %0, #128\n" + " msr cpsr_c, %1" + : "=r" (flags), "=r" (temp) + : + : "memory", "cc"); + return flags; +} + +/* + * Enable IRQs + */ +#define arch_local_irq_enable arch_local_irq_enable +static inline void arch_local_irq_enable(void) +{ + unsigned long temp; + asm volatile( + " mrs %0, cpsr @ arch_local_irq_enable\n" + " bic %0, %0, #128\n" + " msr cpsr_c, %0" + : "=r" (temp) + : + : "memory", "cc"); +} + +/* + * Disable IRQs + */ +#define arch_local_irq_disable arch_local_irq_disable +static inline void arch_local_irq_disable(void) +{ + unsigned long temp; + asm volatile( + " mrs %0, cpsr @ arch_local_irq_disable\n" + " orr %0, %0, #128\n" + " msr cpsr_c, %0" + : "=r" (temp) + : + : "memory", "cc"); +} + +/* + * Enable FIQs + */ +#define local_fiq_enable() \ + ({ \ + unsigned long temp; \ + __asm__ __volatile__( \ + "mrs %0, cpsr @ stf\n" \ +" bic %0, %0, #64\n" \ +" msr cpsr_c, %0" \ + : "=r" (temp) \ + : \ + : "memory", "cc"); \ + }) + +/* + * Disable FIQs + */ +#define local_fiq_disable() \ + ({ \ + unsigned long temp; \ + __asm__ __volatile__( \ + "mrs %0, cpsr @ clf\n" \ +" orr %0, %0, #64\n" \ +" msr cpsr_c, %0" \ + : "=r" (temp) \ + : \ + : "memory", "cc"); \ + }) + +#define local_abt_enable() do { } while (0) +#define local_abt_disable() do { } while (0) +#endif + +/* + * Save the current interrupt enable state. + */ +#define arch_local_save_flags arch_local_save_flags +static inline unsigned long arch_local_save_flags(void) +{ + unsigned long flags; + asm volatile( + " mrs %0, " IRQMASK_REG_NAME_R " @ local_save_flags" + : "=r" (flags) : : "memory", "cc"); + return flags; +} + +/* + * restore saved IRQ state + */ +#define arch_local_irq_restore arch_local_irq_restore +static inline void arch_local_irq_restore(unsigned long flags) +{ + unsigned long temp = 0; + flags &= ~(1 << 6); + asm volatile ( + " mrs %0, cpsr" + : "=r" (temp) + : + : "memory", "cc"); + /* Preserve FIQ bit */ + temp &= (1 << 6); + flags = flags | temp; + asm volatile ( + " msr cpsr_c, %0 @ local_irq_restore" + : + : "r" (flags) + : "memory", "cc"); +} + +#define arch_irqs_disabled_flags arch_irqs_disabled_flags +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return flags & IRQMASK_I_BIT; +} + +#include + +#endif /* ifdef __KERNEL__ */ +#endif /* ifndef __ASM_ARM_IRQFLAGS_H */ diff -ruN linux-org/arch/arm/include/asm/irq.h linux/arch/arm/include/asm/irq.h --- linux-org/arch/arm/include/asm/irq.h 2022-03-25 09:55:36.297474562 +0100 +++ linux/arch/arm/include/asm/irq.h 2022-03-25 10:15:23.337003070 +0100 @@ -7,9 +7,14 @@ #ifndef CONFIG_SPARSE_IRQ #include #else +#if !defined(CONFIG_IPIPE) || defined(CONFIG_IRQ_DOMAIN) #define NR_IRQS NR_IRQS_LEGACY +#else +#define NR_IRQS 512 +#endif #endif + #ifndef irq_canonicalize #define irq_canonicalize(i) (i) #endif @@ -50,4 +55,3 @@ #endif #endif - diff -ruN linux-org/arch/arm/include/asm/mmu_context.h linux/arch/arm/include/asm/mmu_context.h --- linux-org/arch/arm/include/asm/mmu_context.h 2022-03-25 09:55:36.301474546 +0100 +++ linux/arch/arm/include/asm/mmu_context.h 2022-03-25 10:15:23.337003070 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,8 @@ #ifdef CONFIG_CPU_HAS_ASID -void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); +int check_and_switch_context(struct mm_struct *mm, + struct task_struct *tsk, bool may_defer); static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { @@ -50,13 +52,14 @@ #ifdef CONFIG_MMU -static inline void check_and_switch_context(struct mm_struct *mm, - struct task_struct *tsk) +static inline int +check_and_switch_context(struct mm_struct *mm, + struct task_struct *tsk, bool may_defer) { if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) __check_vmalloc_seq(mm); - if (irqs_disabled()) + if (may_defer && irqs_disabled()) { /* * cpu_switch_mm() needs to flush the VIVT caches. To avoid * high interrupt latencies, defer the call and continue @@ -65,10 +68,23 @@ * finish_arch_post_lock_switch() call. */ mm->context.switch_pending = 1; - else + return -EAGAIN; + } else { cpu_switch_mm(mm->pgd, mm); + } + + return 0; } +#ifdef CONFIG_IPIPE +extern void deferred_switch_mm(struct mm_struct *mm); +#else /* !I-pipe */ +static inline void deferred_switch_mm(struct mm_struct *next) +{ + cpu_switch_mm(next->pgd, next); +} +#endif /* !I-pipe */ + #ifndef MODULE #define finish_arch_post_lock_switch \ finish_arch_post_lock_switch @@ -85,8 +101,11 @@ */ preempt_disable(); if (mm->context.switch_pending) { + unsigned long flags; mm->context.switch_pending = 0; - cpu_switch_mm(mm->pgd, mm); + ipipe_mm_switch_protect(flags); + deferred_switch_mm(mm); + ipipe_mm_switch_unprotect(flags); } preempt_enable_no_resched(); } @@ -101,12 +120,8 @@ return 0; } - #endif /* CONFIG_CPU_HAS_ASID */ -#define destroy_context(mm) do { } while(0) -#define activate_mm(prev,next) switch_mm(prev, next, NULL) - /* * This is called when "tsk" is about to enter lazy TLB mode. * @@ -127,12 +142,12 @@ * calling the CPU specific function when the mm hasn't * actually changed. */ -static inline void -switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) +static inline int +__do_switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk, bool may_defer) { #ifdef CONFIG_MMU - unsigned int cpu = smp_processor_id(); + const unsigned int cpu = ipipe_processor_id(); /* * __sync_icache_dcache doesn't broadcast the I-cache invalidation, @@ -145,13 +160,60 @@ __flush_icache_all(); if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) { - check_and_switch_context(next, tsk); - if (cache_is_vivt()) + int rc = check_and_switch_context(next, tsk, may_defer); + if (rc < 0) { +#ifdef CONFIG_IPIPE + cpumask_clear_cpu(cpu, mm_cpumask(next)); + return rc; +#endif /* CONFIG_IPIPE */ + } + if (cache_is_vivt() && prev) cpumask_clear_cpu(cpu, mm_cpumask(prev)); } -#endif +#endif /* CONFIG_MMU */ + return 0; +} + +#if defined(CONFIG_IPIPE) && defined(CONFIG_MMU) +extern void __switch_mm_inner(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk); +#else /* !I-pipe || !MMU */ +#define __switch_mm_inner(prev, next, tsk) \ + __do_switch_mm(prev, next, tsk, true) +#endif /* !I-pipe || !MMU */ + +static inline void +ipipe_switch_mm_head(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + __do_switch_mm(prev, next, tsk, false); +} + +static inline void +__switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + __switch_mm_inner(prev, next, tsk); +} + +static inline void +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ +#ifdef CONFIG_MMU + unsigned long flags; + ipipe_mm_switch_protect(flags); + __switch_mm(prev, next, tsk); + ipipe_mm_switch_unprotect(flags); +#endif /* CONFIG_MMU */ } #define deactivate_mm(tsk,mm) do { } while (0) +#define activate_mm(prev,next) __switch_mm(prev, next, NULL) + +static inline void destroy_context(struct mm_struct *mm) +{ +} + #endif diff -ruN linux-org/arch/arm/include/asm/percpu.h linux/arch/arm/include/asm/percpu.h --- linux-org/arch/arm/include/asm/percpu.h 2022-03-25 09:55:36.305474531 +0100 +++ linux/arch/arm/include/asm/percpu.h 2022-03-25 10:15:23.337003070 +0100 @@ -16,11 +16,15 @@ #ifndef _ASM_ARM_PERCPU_H_ #define _ASM_ARM_PERCPU_H_ +#include + /* * Same as asm-generic/percpu.h, except that we store the per cpu offset * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7 */ -#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6) +#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6) && \ + (!defined(CONFIG_IPIPE) || \ + (!defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_IPIPE_TRACE))) static inline void set_my_cpu_offset(unsigned long off) { /* Set TPIDRPRW */ @@ -43,6 +47,10 @@ } #define __my_cpu_offset __my_cpu_offset() #else +#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE) +#define __my_cpu_offset (per_cpu_offset(ipipe_processor_id())) +#endif /* SMP && IPIPE */ + #define set_my_cpu_offset(x) do {} while(0) #endif /* CONFIG_SMP */ diff -ruN linux-org/arch/arm/include/asm/setup.h linux/arch/arm/include/asm/setup.h --- linux-org/arch/arm/include/asm/setup.h 2022-03-25 09:55:36.305474531 +0100 +++ linux/arch/arm/include/asm/setup.h 2022-03-25 10:15:23.337003070 +0100 @@ -31,4 +31,10 @@ static inline void save_atags(const struct tag *tags) { } #endif +#ifdef CONFIG_IPIPE +void smp_build_cpu_revmap(void); +#else +static inline void smp_build_cpu_revmap(void) { } +#endif + #endif diff -ruN linux-org/arch/arm/include/asm/switch_to.h linux/arch/arm/include/asm/switch_to.h --- linux-org/arch/arm/include/asm/switch_to.h 2022-03-25 09:55:36.305474531 +0100 +++ linux/arch/arm/include/asm/switch_to.h 2022-03-25 10:15:23.341003056 +0100 @@ -23,10 +23,19 @@ */ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info *, struct thread_info *); +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH #define switch_to(prev,next,last) \ do { \ __complete_pending_tlbi(); \ + hard_cond_local_irq_disable(); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + hard_cond_local_irq_enable(); \ } while (0) +#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ +#define switch_to(prev,next,last) \ +do { \ + last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ +} while (0) +#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ #endif /* __ASM_ARM_SWITCH_TO_H */ diff -ruN linux-org/arch/arm/include/asm/thread_info.h linux/arch/arm/include/asm/thread_info.h --- linux-org/arch/arm/include/asm/thread_info.h 2022-03-25 09:55:36.305474531 +0100 +++ linux/arch/arm/include/asm/thread_info.h 2022-03-25 10:15:23.341003056 +0100 @@ -25,6 +25,7 @@ struct task_struct; #include +#include typedef unsigned long mm_segment_t; @@ -65,6 +66,10 @@ #ifdef CONFIG_ARM_THUMBEE unsigned long thumbee_state; /* ThumbEE Handler Base register */ #endif +#ifdef CONFIG_IPIPE + unsigned long ipipe_flags; +#endif + struct ipipe_threadinfo ipipe_data; }; #define INIT_THREAD_INFO(tsk) \ @@ -149,6 +154,8 @@ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 +#define TIF_MMSWITCH_INT 23 /* MMU context switch preempted */ + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -159,6 +166,8 @@ #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) +#define _TIF_MMSWITCH_INT (1 << TIF_MMSWITCH_INT) + /* Checks for any syscall work in entry-common.S */ #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) @@ -169,5 +178,14 @@ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE) +/* ti->ipipe_flags */ +#define TIP_MAYDAY 0 /* MAYDAY call is pending */ +#define TIP_NOTIFY 1 /* Notify head domain about kernel events */ +#define TIP_HEAD 2 /* Runs in head domain */ + +#define _TIP_MAYDAY (1 << TIP_MAYDAY) +#define _TIP_NOTIFY (1 << TIP_NOTIFY) +#define _TIP_HEAD (1 << TIP_HEAD) + #endif /* __KERNEL__ */ #endif /* __ASM_ARM_THREAD_INFO_H */ diff -ruN linux-org/arch/arm/include/asm/uaccess.h linux/arch/arm/include/asm/uaccess.h --- linux-org/arch/arm/include/asm/uaccess.h 2022-03-25 09:55:36.305474531 +0100 +++ linux/arch/arm/include/asm/uaccess.h 2022-03-25 10:15:23.341003056 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -191,7 +192,7 @@ #define get_user(x, p) \ ({ \ - might_fault(); \ + __ipipe_uaccess_might_fault(); \ __get_user_check(x, p); \ }) @@ -271,7 +272,7 @@ unsigned long __gu_val; \ unsigned int __ua_flags; \ __chk_user_ptr(ptr); \ - might_fault(); \ + __ipipe_uaccess_might_fault(); \ __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(ptr))) { \ case 1: __get_user_asm_byte(__gu_val, __gu_addr, err); break; \ @@ -331,7 +332,7 @@ const __typeof__(*(ptr)) __user *__pu_ptr = (ptr); \ __typeof__(*(ptr)) __pu_val = (x); \ unsigned int __ua_flags; \ - might_fault(); \ + __ipipe_uaccess_might_fault(); \ __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(ptr))) { \ case 1: __fn(__pu_val, __pu_ptr, __err, 1); break; \ @@ -442,7 +443,6 @@ : "r" (x), "i" (-EFAULT) \ : "cc") - #ifdef CONFIG_MMU extern unsigned long __must_check arm_copy_from_user(void *to, const void __user *from, unsigned long n); diff -ruN linux-org/arch/arm/include/uapi/asm/unistd.h linux/arch/arm/include/uapi/asm/unistd.h --- linux-org/arch/arm/include/uapi/asm/unistd.h 2022-03-25 09:55:36.309474516 +0100 +++ linux/arch/arm/include/uapi/asm/unistd.h 2022-03-25 10:15:23.341003056 +0100 @@ -37,4 +37,10 @@ #define __ARM_NR_usr32 (__ARM_NR_BASE+4) #define __ARM_NR_set_tls (__ARM_NR_BASE+5) +/* + * This SWI is IPIPE private, for dispatching syscalls to the head + * domain. + */ +#define __ARM_NR_ipipe (__ARM_NR_BASE+66) + #endif /* _UAPI__ASM_ARM_UNISTD_H */ diff -ruN linux-org/arch/arm/Kconfig linux/arch/arm/Kconfig --- linux-org/arch/arm/Kconfig 2022-03-25 09:55:36.093475330 +0100 +++ linux/arch/arm/Kconfig 2022-03-25 10:15:23.329003100 +0100 @@ -53,7 +53,7 @@ select HAVE_ARM_SMCCC if CPU_V7 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CC_STACKPROTECTOR - select HAVE_CONTEXT_TRACKING + select HAVE_CONTEXT_TRACKING if !IPIPE select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG @@ -913,6 +913,14 @@ config PLAT_VERSATILE bool +if IPIPE +config IPIPE_ARM_KUSER_TSC + bool + select GENERIC_TIME_VSYSCALL + select IPIPE_HAVE_HOSTRT if IPIPE + default y if ARM_TIMER_SP804 || ARCH_MXC || ARCH_OMAP +endif + source "arch/arm/firmware/Kconfig" source arch/arm/mm/Kconfig @@ -1483,6 +1491,8 @@ If unsure, leave the default value. +source kernel/ipipe/Kconfig + source kernel/Kconfig.preempt config HZ_FIXED @@ -1757,6 +1767,7 @@ config UACCESS_WITH_MEMCPY bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user()" depends on MMU + depends on !IPIPE default y if CPU_FEROCEON help Implement faster copy_to_user and clear_user methods for CPU diff -ruN linux-org/arch/arm/Kconfig.orig linux/arch/arm/Kconfig.orig --- linux-org/arch/arm/Kconfig.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/Kconfig.orig 2022-03-25 10:09:27.162344753 +0100 @@ -0,0 +1,1998 @@ +# SPDX-License-Identifier: GPL-2.0 +config ARM + bool + default y + select ARCH_32BIT_OFF_T + select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_DEBUG_VIRTUAL if MMU + select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE + select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_KEEPINITRD + select ARCH_HAS_KCOV + select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + select ARCH_HAS_PTE_SPECIAL if ARM_LPAE + select ARCH_HAS_PHYS_TO_DMA + select ARCH_HAS_SETUP_DMA_OPS + select ARCH_HAS_SET_MEMORY + select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL + select ARCH_HAS_STRICT_MODULE_RWX if MMU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB || !MMU + select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB || !MMU + select ARCH_HAS_TEARDOWN_DMA_OPS if MMU + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAVE_CUSTOM_GPIO_H + select ARCH_HAVE_NMI_SAFE_CMPXCHG if CPU_V7 || CPU_V7M || CPU_V6K + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_KEEP_MEMBLOCK + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_MEMTEST + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU + select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_LD_ORPHAN_WARN + select BINFMT_FLAT_ARGVP_ENVP_ON_STACK + select BUILDTIME_TABLE_SORT if MMU + select CLONE_BACKWARDS + select CPU_PM if SUSPEND || CPU_IDLE + select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS + select DMA_DECLARE_COHERENT + select DMA_GLOBAL_POOL if !MMU + select DMA_OPS + select DMA_REMAP if MMU + select EDAC_SUPPORT + select EDAC_ATOMIC_SCRUB + select GENERIC_ALLOCATOR + select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY + select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_IRQ_IPI if SMP + select GENERIC_CPU_AUTOPROBE + select GENERIC_EARLY_IOREMAP + select GENERIC_IDLE_POLL_SETUP + select GENERIC_IRQ_PROBE + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL + select GENERIC_LIB_DEVMEM_IS_ALLOWED + select GENERIC_PCI_IOMAP + select GENERIC_SCHED_CLOCK + select GENERIC_SMP_IDLE_THREAD + select HANDLE_DOMAIN_IRQ + select HARDIRQS_SW_RESEND + select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT + select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL + select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_PFN_VALID + select HAVE_ARCH_SECCOMP + select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT + select HAVE_ARCH_THREAD_STRUCT_WHITELIST + select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE + select HAVE_ARM_SMCCC if CPU_V7 + select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 + select HAVE_CONTEXT_TRACKING + select HAVE_C_RECORDMCOUNT + select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL + select HAVE_DMA_CONTIGUOUS if MMU + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU + select HAVE_EXIT_THREAD + select HAVE_FAST_GUP if ARM_LPAE + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG + select HAVE_FUNCTION_TRACER if !XIP_KERNEL + select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GCC_PLUGINS + select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) + select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 + select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M + select HAVE_KRETPROBES if HAVE_KPROBES + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI + select HAVE_OPTPROBES if !THUMB2_KERNEL + select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP + select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ + select HAVE_STACKPROTECTOR + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_UID16 + select HAVE_VIRT_CPU_ACCOUNTING_GEN + select IRQ_FORCED_THREADING + select MODULES_USE_ELF_REL + select NEED_DMA_MAP_STATE + select OF_EARLY_FLATTREE if OF + select OLD_SIGACTION + select OLD_SIGSUSPEND3 + select PCI_SYSCALL if PCI + select PERF_USE_VMALLOC + select RTC_LIB + select SYS_SUPPORTS_APM_EMULATION + select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M + # Above selects are sorted alphabetically; please add new ones + # according to that. Thanks. + help + The ARM series is a line of low-power-consumption RISC chip designs + licensed by ARM Ltd and targeted at embedded applications and + handhelds such as the Compaq IPAQ. ARM-based PCs are no longer + manufactured, but legacy ARM-based PC hardware remains popular in + Europe. There is an ARM Linux project with a web page at + . + +config ARM_HAS_SG_CHAIN + bool + +config ARM_DMA_USE_IOMMU + bool + select ARM_HAS_SG_CHAIN + select NEED_SG_DMA_LENGTH + +if ARM_DMA_USE_IOMMU + +config ARM_DMA_IOMMU_ALIGNMENT + int "Maximum PAGE_SIZE order of alignment for DMA IOMMU buffers" + range 4 9 + default 8 + help + DMA mapping framework by default aligns all buffers to the smallest + PAGE_SIZE order which is greater than or equal to the requested buffer + size. This works well for buffers up to a few hundreds kilobytes, but + for larger buffers it just a waste of address space. Drivers which has + relatively small addressing window (like 64Mib) might run out of + virtual space with just a few allocations. + + With this parameter you can specify the maximum PAGE_SIZE order for + DMA IOMMU buffers. Larger buffers will be aligned only to this + specified order. The order is expressed as a power of two multiplied + by the PAGE_SIZE. + +endif + +config SYS_SUPPORTS_APM_EMULATION + bool + +config HAVE_TCM + bool + select GENERIC_ALLOCATOR + +config HAVE_PROC_CPU + bool + +config NO_IOPORT_MAP + bool + +config SBUS + bool + +config STACKTRACE_SUPPORT + bool + default y + +config LOCKDEP_SUPPORT + bool + default y + +config ARCH_HAS_ILOG2_U32 + bool + +config ARCH_HAS_ILOG2_U64 + bool + +config ARCH_HAS_BANDGAP + bool + +config FIX_EARLYCON_MEM + def_bool y if MMU + +config GENERIC_HWEIGHT + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config ARCH_MAY_HAVE_PC_FDC + bool + +config ARCH_SUPPORTS_UPROBES + def_bool y + +config ARCH_HAS_DMA_SET_COHERENT_MASK + bool + +config GENERIC_ISA_DMA + bool + +config FIQ + bool + +config NEED_RET_TO_USER + bool + +config ARCH_MTD_XIP + bool + +config ARM_PATCH_PHYS_VIRT + bool "Patch physical to virtual translations at runtime" if EMBEDDED + default y + depends on !XIP_KERNEL && MMU + help + Patch phys-to-virt and virt-to-phys translation functions at + boot and module load time according to the position of the + kernel in system memory. + + This can only be used with non-XIP MMU kernels where the base + of physical memory is at a 2 MiB boundary. + + Only disable this option if you know that you do not require + this feature (eg, building a kernel for a single machine) and + you need to shrink the kernel to the minimal size. + +config NEED_MACH_IO_H + bool + help + Select this when mach/io.h is required to provide special + definitions for this platform. The need for mach/io.h should + be avoided when possible. + +config NEED_MACH_MEMORY_H + bool + help + Select this when mach/memory.h is required to provide special + definitions for this platform. The need for mach/memory.h should + be avoided when possible. + +config PHYS_OFFSET + hex "Physical address of main memory" if MMU + depends on !ARM_PATCH_PHYS_VIRT + default DRAM_BASE if !MMU + default 0x00000000 if ARCH_FOOTBRIDGE + default 0x10000000 if ARCH_OMAP1 || ARCH_RPC + default 0x20000000 if ARCH_S5PV210 + default 0xc0000000 if ARCH_SA1100 + help + Please provide the physical address corresponding to the + location of main memory in your system. + +config GENERIC_BUG + def_bool y + depends on BUG + +config PGTABLE_LEVELS + int + default 3 if ARM_LPAE + default 2 + +menu "System Type" + +config MMU + bool "MMU-based Paged Memory Management Support" + default y + help + Select if you want MMU-based virtualised addressing space + support by paged memory management. If unsure, say 'Y'. + +config ARCH_MMAP_RND_BITS_MIN + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 14 if PAGE_OFFSET=0x40000000 + default 15 if PAGE_OFFSET=0x80000000 + default 16 + +# +# The "ARM system type" choice list is ordered alphabetically by option +# text. Please add new entries in the option alphabetic order. +# +choice + prompt "ARM system type" + default ARM_SINGLE_ARMV7M if !MMU + default ARCH_MULTIPLATFORM if MMU + +config ARCH_MULTIPLATFORM + bool "Allow multiple platforms to be selected" + depends on MMU + select ARCH_FLATMEM_ENABLE + select ARCH_SPARSEMEM_ENABLE + select ARCH_SELECT_MEMORY_MODEL + select ARM_HAS_SG_CHAIN + select ARM_PATCH_PHYS_VIRT + select AUTO_ZRELADDR + select TIMER_OF + select COMMON_CLK + select GENERIC_IRQ_MULTI_HANDLER + select HAVE_PCI + select PCI_DOMAINS_GENERIC if PCI + select SPARSE_IRQ + select USE_OF + +config ARM_SINGLE_ARMV7M + bool "ARMv7-M based platforms (Cortex-M0/M3/M4)" + depends on !MMU + select ARM_NVIC + select AUTO_ZRELADDR + select TIMER_OF + select COMMON_CLK + select CPU_V7M + select NO_IOPORT_MAP + select SPARSE_IRQ + select USE_OF + +config ARCH_EP93XX + bool "EP93xx-based" + select ARCH_SPARSEMEM_ENABLE + select ARM_AMBA + imply ARM_PATCH_PHYS_VIRT + select ARM_VIC + select GENERIC_IRQ_MULTI_HANDLER + select AUTO_ZRELADDR + select CLKSRC_MMIO + select CPU_ARM920T + select GPIOLIB + select HAVE_LEGACY_CLK + help + This enables support for the Cirrus EP93xx series of CPUs. + +config ARCH_FOOTBRIDGE + bool "FootBridge" + select CPU_SA110 + select FOOTBRIDGE + select NEED_MACH_IO_H if !MMU + select NEED_MACH_MEMORY_H + help + Support for systems based on the DC21285 companion chip + ("FootBridge"), such as the Simtec CATS and the Rebel NetWinder. + +config ARCH_IOP32X + bool "IOP32x-based" + depends on MMU + select CPU_XSCALE + select GPIO_IOP + select GPIOLIB + select NEED_RET_TO_USER + select FORCE_PCI + select PLAT_IOP + help + Support for Intel's 80219 and IOP32X (XScale) family of + processors. + +config ARCH_IXP4XX + bool "IXP4xx-based" + depends on MMU + select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_SUPPORTS_BIG_ENDIAN + select CPU_XSCALE + select DMABOUNCE if PCI + select GENERIC_IRQ_MULTI_HANDLER + select GPIO_IXP4XX + select GPIOLIB + select HAVE_PCI + select IXP4XX_IRQ + select IXP4XX_TIMER + # With the new PCI driver this is not needed + select NEED_MACH_IO_H if IXP4XX_PCI_LEGACY + select USB_EHCI_BIG_ENDIAN_DESC + select USB_EHCI_BIG_ENDIAN_MMIO + help + Support for Intel's IXP4XX (XScale) family of processors. + +config ARCH_DOVE + bool "Marvell Dove" + select CPU_PJ4 + select GENERIC_IRQ_MULTI_HANDLER + select GPIOLIB + select HAVE_PCI + select MVEBU_MBUS + select PINCTRL + select PINCTRL_DOVE + select PLAT_ORION_LEGACY + select SPARSE_IRQ + select PM_GENERIC_DOMAINS if PM + help + Support for the Marvell Dove SoC 88AP510 + +config ARCH_PXA + bool "PXA2xx/PXA3xx-based" + depends on MMU + select ARCH_MTD_XIP + select ARM_CPU_SUSPEND if PM + select AUTO_ZRELADDR + select COMMON_CLK + select CLKSRC_PXA + select CLKSRC_MMIO + select TIMER_OF + select CPU_XSCALE if !CPU_XSC3 + select GENERIC_IRQ_MULTI_HANDLER + select GPIO_PXA + select GPIOLIB + select IRQ_DOMAIN + select PLAT_PXA + select SPARSE_IRQ + help + Support for Intel/Marvell's PXA2xx/PXA3xx processor line. + +config ARCH_RPC + bool "RiscPC" + depends on MMU + select ARCH_ACORN + select ARCH_MAY_HAVE_PC_FDC + select ARCH_SPARSEMEM_ENABLE + select ARM_HAS_SG_CHAIN + select CPU_SA110 + select FIQ + select HAVE_PATA_PLATFORM + select ISA_DMA_API + select LEGACY_TIMER_TICK + select NEED_MACH_IO_H + select NEED_MACH_MEMORY_H + select NO_IOPORT_MAP + help + On the Acorn Risc-PC, Linux can support the internal IDE disk and + CD-ROM interface, serial and parallel port, and the floppy drive. + +config ARCH_SA1100 + bool "SA1100-based" + select ARCH_MTD_XIP + select ARCH_SPARSEMEM_ENABLE + select CLKSRC_MMIO + select CLKSRC_PXA + select TIMER_OF if OF + select COMMON_CLK + select CPU_FREQ + select CPU_SA1100 + select GENERIC_IRQ_MULTI_HANDLER + select GPIOLIB + select IRQ_DOMAIN + select ISA + select NEED_MACH_MEMORY_H + select SPARSE_IRQ + help + Support for StrongARM 11x0 based boards. + +config ARCH_S3C24XX + bool "Samsung S3C24XX SoCs" + select ATAGS + select CLKSRC_SAMSUNG_PWM + select GPIO_SAMSUNG + select GPIOLIB + select GENERIC_IRQ_MULTI_HANDLER + select HAVE_S3C2410_I2C if I2C + select HAVE_S3C_RTC if RTC_CLASS + select NEED_MACH_IO_H + select S3C2410_WATCHDOG + select SAMSUNG_ATAGS + select USE_OF + select WATCHDOG + help + Samsung S3C2410, S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 + and S3C2450 SoCs based systems, such as the Simtec Electronics BAST + (), the IPAQ 1940 or the + Samsung SMDK2410 development board (and derivatives). + +config ARCH_OMAP1 + bool "TI OMAP1" + depends on MMU + select ARCH_OMAP + select CLKSRC_MMIO + select GENERIC_IRQ_CHIP + select GENERIC_IRQ_MULTI_HANDLER + select GPIOLIB + select HAVE_LEGACY_CLK + select IRQ_DOMAIN + select NEED_MACH_IO_H if PCCARD + select NEED_MACH_MEMORY_H + select SPARSE_IRQ + help + Support for older TI OMAP1 (omap7xx, omap15xx or omap16xx) + +endchoice + +menu "Multiple platform selection" + depends on ARCH_MULTIPLATFORM + +comment "CPU Core family selection" + +config ARCH_MULTI_V4 + bool "ARMv4 based platforms (FA526)" + depends on !ARCH_MULTI_V6_V7 + select ARCH_MULTI_V4_V5 + select CPU_FA526 + +config ARCH_MULTI_V4T + bool "ARMv4T based platforms (ARM720T, ARM920T, ...)" + depends on !ARCH_MULTI_V6_V7 + select ARCH_MULTI_V4_V5 + select CPU_ARM920T if !(CPU_ARM7TDMI || CPU_ARM720T || \ + CPU_ARM740T || CPU_ARM9TDMI || CPU_ARM922T || \ + CPU_ARM925T || CPU_ARM940T) + +config ARCH_MULTI_V5 + bool "ARMv5 based platforms (ARM926T, XSCALE, PJ1, ...)" + depends on !ARCH_MULTI_V6_V7 + select ARCH_MULTI_V4_V5 + select CPU_ARM926T if !(CPU_ARM946E || CPU_ARM1020 || \ + CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \ + CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_FEROCEON) + +config ARCH_MULTI_V4_V5 + bool + +config ARCH_MULTI_V6 + bool "ARMv6 based platforms (ARM11)" + select ARCH_MULTI_V6_V7 + select CPU_V6K + +config ARCH_MULTI_V7 + bool "ARMv7 based platforms (Cortex-A, PJ4, Scorpion, Krait)" + default y + select ARCH_MULTI_V6_V7 + select CPU_V7 + select HAVE_SMP + +config ARCH_MULTI_V6_V7 + bool + select MIGHT_HAVE_CACHE_L2X0 + +config ARCH_MULTI_CPU_AUTO + def_bool !(ARCH_MULTI_V4 || ARCH_MULTI_V4T || ARCH_MULTI_V6_V7) + select ARCH_MULTI_V5 + +endmenu + +config ARCH_VIRT + bool "Dummy Virtual Machine" + depends on ARCH_MULTI_V7 + select ARM_AMBA + select ARM_GIC + select ARM_GIC_V2M if PCI + select ARM_GIC_V3 + select ARM_GIC_V3_ITS if PCI + select ARM_PSCI + select HAVE_ARM_ARCH_TIMER + select ARCH_SUPPORTS_BIG_ENDIAN + +# +# This is sorted alphabetically by mach-* pathname. However, plat-* +# Kconfigs may be included either alphabetically (according to the +# plat- suffix) or along side the corresponding mach-* source. +# +source "arch/arm/mach-actions/Kconfig" + +source "arch/arm/mach-alpine/Kconfig" + +source "arch/arm/mach-artpec/Kconfig" + +source "arch/arm/mach-asm9260/Kconfig" + +source "arch/arm/mach-aspeed/Kconfig" + +source "arch/arm/mach-at91/Kconfig" + +source "arch/arm/mach-axxia/Kconfig" + +source "arch/arm/mach-bcm/Kconfig" + +source "arch/arm/mach-berlin/Kconfig" + +source "arch/arm/mach-clps711x/Kconfig" + +source "arch/arm/mach-cns3xxx/Kconfig" + +source "arch/arm/mach-davinci/Kconfig" + +source "arch/arm/mach-digicolor/Kconfig" + +source "arch/arm/mach-dove/Kconfig" + +source "arch/arm/mach-ep93xx/Kconfig" + +source "arch/arm/mach-exynos/Kconfig" + +source "arch/arm/mach-footbridge/Kconfig" + +source "arch/arm/mach-gemini/Kconfig" + +source "arch/arm/mach-highbank/Kconfig" + +source "arch/arm/mach-hisi/Kconfig" + +source "arch/arm/mach-imx/Kconfig" + +source "arch/arm/mach-integrator/Kconfig" + +source "arch/arm/mach-iop32x/Kconfig" + +source "arch/arm/mach-ixp4xx/Kconfig" + +source "arch/arm/mach-keystone/Kconfig" + +source "arch/arm/mach-lpc32xx/Kconfig" + +source "arch/arm/mach-mediatek/Kconfig" + +source "arch/arm/mach-meson/Kconfig" + +source "arch/arm/mach-milbeaut/Kconfig" + +source "arch/arm/mach-mmp/Kconfig" + +source "arch/arm/mach-moxart/Kconfig" + +source "arch/arm/mach-mstar/Kconfig" + +source "arch/arm/mach-mv78xx0/Kconfig" + +source "arch/arm/mach-mvebu/Kconfig" + +source "arch/arm/mach-mxs/Kconfig" + +source "arch/arm/mach-nomadik/Kconfig" + +source "arch/arm/mach-npcm/Kconfig" + +source "arch/arm/mach-nspire/Kconfig" + +source "arch/arm/plat-omap/Kconfig" + +source "arch/arm/mach-omap1/Kconfig" + +source "arch/arm/mach-omap2/Kconfig" + +source "arch/arm/mach-orion5x/Kconfig" + +source "arch/arm/mach-oxnas/Kconfig" + +source "arch/arm/mach-pxa/Kconfig" +source "arch/arm/plat-pxa/Kconfig" + +source "arch/arm/mach-qcom/Kconfig" + +source "arch/arm/mach-rda/Kconfig" + +source "arch/arm/mach-realtek/Kconfig" + +source "arch/arm/mach-realview/Kconfig" + +source "arch/arm/mach-rockchip/Kconfig" + +source "arch/arm/mach-s3c/Kconfig" + +source "arch/arm/mach-s5pv210/Kconfig" + +source "arch/arm/mach-sa1100/Kconfig" + +source "arch/arm/mach-shmobile/Kconfig" + +source "arch/arm/mach-socfpga/Kconfig" + +source "arch/arm/mach-spear/Kconfig" + +source "arch/arm/mach-sti/Kconfig" + +source "arch/arm/mach-stm32/Kconfig" + +source "arch/arm/mach-sunxi/Kconfig" + +source "arch/arm/mach-tegra/Kconfig" + +source "arch/arm/mach-uniphier/Kconfig" + +source "arch/arm/mach-ux500/Kconfig" + +source "arch/arm/mach-versatile/Kconfig" + +source "arch/arm/mach-vexpress/Kconfig" + +source "arch/arm/mach-vt8500/Kconfig" + +source "arch/arm/mach-zynq/Kconfig" + +# ARMv7-M architecture +config ARCH_LPC18XX + bool "NXP LPC18xx/LPC43xx" + depends on ARM_SINGLE_ARMV7M + select ARCH_HAS_RESET_CONTROLLER + select ARM_AMBA + select CLKSRC_LPC32XX + select PINCTRL + help + Support for NXP's LPC18xx Cortex-M3 and LPC43xx Cortex-M4 + high performance microcontrollers. + +config ARCH_MPS2 + bool "ARM MPS2 platform" + depends on ARM_SINGLE_ARMV7M + select ARM_AMBA + select CLKSRC_MPS2 + help + Support for Cortex-M Prototyping System (or V2M-MPS2) which comes + with a range of available cores like Cortex-M3/M4/M7. + + Please, note that depends which Application Note is used memory map + for the platform may vary, so adjustment of RAM base might be needed. + +# Definitions to make life easier +config ARCH_ACORN + bool + +config PLAT_IOP + bool + +config PLAT_ORION + bool + select CLKSRC_MMIO + select COMMON_CLK + select GENERIC_IRQ_CHIP + select IRQ_DOMAIN + +config PLAT_ORION_LEGACY + bool + select PLAT_ORION + +config PLAT_PXA + bool + +config PLAT_VERSATILE + bool + +source "arch/arm/mm/Kconfig" + +config IWMMXT + bool "Enable iWMMXt support" + depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4 || CPU_PJ4B + default y if PXA27x || PXA3xx || ARCH_MMP || CPU_PJ4 || CPU_PJ4B + help + Enable support for iWMMXt context switching at run time if + running on a CPU that supports it. + +if !MMU +source "arch/arm/Kconfig-nommu" +endif + +config PJ4B_ERRATA_4742 + bool "PJ4B Errata 4742: IDLE Wake Up Commands can Cause the CPU Core to Cease Operation" + depends on CPU_PJ4B && MACH_ARMADA_370 + default y + help + When coming out of either a Wait for Interrupt (WFI) or a Wait for + Event (WFE) IDLE states, a specific timing sensitivity exists between + the retiring WFI/WFE instructions and the newly issued subsequent + instructions. This sensitivity can result in a CPU hang scenario. + Workaround: + The software must insert either a Data Synchronization Barrier (DSB) + or Data Memory Barrier (DMB) command immediately after the WFI/WFE + instruction + +config ARM_ERRATA_326103 + bool "ARM errata: FSR write bit incorrect on a SWP to read-only memory" + depends on CPU_V6 + help + Executing a SWP instruction to read-only memory does not set bit 11 + of the FSR on the ARM 1136 prior to r1p0. This causes the kernel to + treat the access as a read, preventing a COW from occurring and + causing the faulting task to livelock. + +config ARM_ERRATA_411920 + bool "ARM errata: Invalidation of the Instruction Cache operation can fail" + depends on CPU_V6 || CPU_V6K + help + Invalidation of the Instruction Cache operation can + fail. This erratum is present in 1136 (before r1p4), 1156 and 1176. + It does not affect the MPCore. This option enables the ARM Ltd. + recommended workaround. + +config ARM_ERRATA_430973 + bool "ARM errata: Stale prediction on replaced interworking branch" + depends on CPU_V7 + help + This option enables the workaround for the 430973 Cortex-A8 + r1p* erratum. If a code sequence containing an ARM/Thumb + interworking branch is replaced with another code sequence at the + same virtual address, whether due to self-modifying code or virtual + to physical address re-mapping, Cortex-A8 does not recover from the + stale interworking branch prediction. This results in Cortex-A8 + executing the new code sequence in the incorrect ARM or Thumb state. + The workaround enables the BTB/BTAC operations by setting ACTLR.IBE + and also flushes the branch target cache at every context switch. + Note that setting specific bits in the ACTLR register may not be + available in non-secure mode. + +config ARM_ERRATA_458693 + bool "ARM errata: Processor deadlock when a false hazard is created" + depends on CPU_V7 + depends on !ARCH_MULTIPLATFORM + help + This option enables the workaround for the 458693 Cortex-A8 (r2p0) + erratum. For very specific sequences of memory operations, it is + possible for a hazard condition intended for a cache line to instead + be incorrectly associated with a different cache line. This false + hazard might then cause a processor deadlock. The workaround enables + the L1 caching of the NEON accesses and disables the PLD instruction + in the ACTLR register. Note that setting specific bits in the ACTLR + register may not be available in non-secure mode. + +config ARM_ERRATA_460075 + bool "ARM errata: Data written to the L2 cache can be overwritten with stale data" + depends on CPU_V7 + depends on !ARCH_MULTIPLATFORM + help + This option enables the workaround for the 460075 Cortex-A8 (r2p0) + erratum. Any asynchronous access to the L2 cache may encounter a + situation in which recent store transactions to the L2 cache are lost + and overwritten with stale memory contents from external memory. The + workaround disables the write-allocate mode for the L2 cache via the + ACTLR register. Note that setting specific bits in the ACTLR register + may not be available in non-secure mode. + +config ARM_ERRATA_742230 + bool "ARM errata: DMB operation may be faulty" + depends on CPU_V7 && SMP + depends on !ARCH_MULTIPLATFORM + help + This option enables the workaround for the 742230 Cortex-A9 + (r1p0..r2p2) erratum. Under rare circumstances, a DMB instruction + between two write operations may not ensure the correct visibility + ordering of the two writes. This workaround sets a specific bit in + the diagnostic register of the Cortex-A9 which causes the DMB + instruction to behave as a DSB, ensuring the correct behaviour of + the two writes. + +config ARM_ERRATA_742231 + bool "ARM errata: Incorrect hazard handling in the SCU may lead to data corruption" + depends on CPU_V7 && SMP + depends on !ARCH_MULTIPLATFORM + help + This option enables the workaround for the 742231 Cortex-A9 + (r2p0..r2p2) erratum. Under certain conditions, specific to the + Cortex-A9 MPCore micro-architecture, two CPUs working in SMP mode, + accessing some data located in the same cache line, may get corrupted + data due to bad handling of the address hazard when the line gets + replaced from one of the CPUs at the same time as another CPU is + accessing it. This workaround sets specific bits in the diagnostic + register of the Cortex-A9 which reduces the linefill issuing + capabilities of the processor. + +config ARM_ERRATA_643719 + bool "ARM errata: LoUIS bit field in CLIDR register is incorrect" + depends on CPU_V7 && SMP + default y + help + This option enables the workaround for the 643719 Cortex-A9 (prior to + r1p0) erratum. On affected cores the LoUIS bit field of the CLIDR + register returns zero when it should return one. The workaround + corrects this value, ensuring cache maintenance operations which use + it behave as intended and avoiding data corruption. + +config ARM_ERRATA_720789 + bool "ARM errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID" + depends on CPU_V7 + help + This option enables the workaround for the 720789 Cortex-A9 (prior to + r2p0) erratum. A faulty ASID can be sent to the other CPUs for the + broadcasted CP15 TLB maintenance operations TLBIASIDIS and TLBIMVAIS. + As a consequence of this erratum, some TLB entries which should be + invalidated are not, resulting in an incoherency in the system page + tables. The workaround changes the TLB flushing routines to invalidate + entries regardless of the ASID. + +config ARM_ERRATA_743622 + bool "ARM errata: Faulty hazard checking in the Store Buffer may lead to data corruption" + depends on CPU_V7 + depends on !ARCH_MULTIPLATFORM + help + This option enables the workaround for the 743622 Cortex-A9 + (r2p*) erratum. Under very rare conditions, a faulty + optimisation in the Cortex-A9 Store Buffer may lead to data + corruption. This workaround sets a specific bit in the diagnostic + register of the Cortex-A9 which disables the Store Buffer + optimisation, preventing the defect from occurring. This has no + visible impact on the overall performance or power consumption of the + processor. + +config ARM_ERRATA_751472 + bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation" + depends on CPU_V7 + depends on !ARCH_MULTIPLATFORM + help + This option enables the workaround for the 751472 Cortex-A9 (prior + to r3p0) erratum. An interrupted ICIALLUIS operation may prevent the + completion of a following broadcasted operation if the second + operation is received by a CPU before the ICIALLUIS has completed, + potentially leading to corrupted entries in the cache or TLB. + +config ARM_ERRATA_754322 + bool "ARM errata: possible faulty MMU translations following an ASID switch" + depends on CPU_V7 + help + This option enables the workaround for the 754322 Cortex-A9 (r2p*, + r3p*) erratum. A speculative memory access may cause a page table walk + which starts prior to an ASID switch but completes afterwards. This + can populate the micro-TLB with a stale entry which may be hit with + the new ASID. This workaround places two dsb instructions in the mm + switching code so that no page table walks can cross the ASID switch. + +config ARM_ERRATA_754327 + bool "ARM errata: no automatic Store Buffer drain" + depends on CPU_V7 && SMP + help + This option enables the workaround for the 754327 Cortex-A9 (prior to + r2p0) erratum. The Store Buffer does not have any automatic draining + mechanism and therefore a livelock may occur if an external agent + continuously polls a memory location waiting to observe an update. + This workaround defines cpu_relax() as smp_mb(), preventing correctly + written polling loops from denying visibility of updates to memory. + +config ARM_ERRATA_364296 + bool "ARM errata: Possible cache data corruption with hit-under-miss enabled" + depends on CPU_V6 + help + This options enables the workaround for the 364296 ARM1136 + r0p2 erratum (possible cache data corruption with + hit-under-miss enabled). It sets the undocumented bit 31 in + the auxiliary control register and the FI bit in the control + register, thus disabling hit-under-miss without putting the + processor into full low interrupt latency mode. ARM11MPCore + is not affected. + +config ARM_ERRATA_764369 + bool "ARM errata: Data cache line maintenance operation by MVA may not succeed" + depends on CPU_V7 && SMP + help + This option enables the workaround for erratum 764369 + affecting Cortex-A9 MPCore with two or more processors (all + current revisions). Under certain timing circumstances, a data + cache line maintenance operation by MVA targeting an Inner + Shareable memory region may fail to proceed up to either the + Point of Coherency or to the Point of Unification of the + system. This workaround adds a DSB instruction before the + relevant cache maintenance functions and sets a specific bit + in the diagnostic control register of the SCU. + +config ARM_ERRATA_775420 + bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock" + depends on CPU_V7 + help + This option enables the workaround for the 775420 Cortex-A9 (r2p2, + r2p6,r2p8,r2p10,r3p0) erratum. In case a data cache maintenance + operation aborts with MMU exception, it might cause the processor + to deadlock. This workaround puts DSB before executing ISB if + an abort may occur on cache maintenance. + +config ARM_ERRATA_798181 + bool "ARM errata: TLBI/DSB failure on Cortex-A15" + depends on CPU_V7 && SMP + help + On Cortex-A15 (r0p0..r3p2) the TLBI*IS/DSB operations are not + adequately shooting down all use of the old entries. This + option enables the Linux kernel workaround for this erratum + which sends an IPI to the CPUs that are running the same ASID + as the one being invalidated. + +config ARM_ERRATA_773022 + bool "ARM errata: incorrect instructions may be executed from loop buffer" + depends on CPU_V7 + help + This option enables the workaround for the 773022 Cortex-A15 + (up to r0p4) erratum. In certain rare sequences of code, the + loop buffer may deliver incorrect instructions. This + workaround disables the loop buffer to avoid the erratum. + +config ARM_ERRATA_818325_852422 + bool "ARM errata: A12: some seqs of opposed cond code instrs => deadlock or corruption" + depends on CPU_V7 + help + This option enables the workaround for: + - Cortex-A12 818325: Execution of an UNPREDICTABLE STR or STM + instruction might deadlock. Fixed in r0p1. + - Cortex-A12 852422: Execution of a sequence of instructions might + lead to either a data corruption or a CPU deadlock. Not fixed in + any Cortex-A12 cores yet. + This workaround for all both errata involves setting bit[12] of the + Feature Register. This bit disables an optimisation applied to a + sequence of 2 instructions that use opposing condition codes. + +config ARM_ERRATA_821420 + bool "ARM errata: A12: sequence of VMOV to core registers might lead to a dead lock" + depends on CPU_V7 + help + This option enables the workaround for the 821420 Cortex-A12 + (all revs) erratum. In very rare timing conditions, a sequence + of VMOV to Core registers instructions, for which the second + one is in the shadow of a branch or abort, can lead to a + deadlock when the VMOV instructions are issued out-of-order. + +config ARM_ERRATA_825619 + bool "ARM errata: A12: DMB NSHST/ISHST mixed ... might cause deadlock" + depends on CPU_V7 + help + This option enables the workaround for the 825619 Cortex-A12 + (all revs) erratum. Within rare timing constraints, executing a + DMB NSHST or DMB ISHST instruction followed by a mix of Cacheable + and Device/Strongly-Ordered loads and stores might cause deadlock + +config ARM_ERRATA_857271 + bool "ARM errata: A12: CPU might deadlock under some very rare internal conditions" + depends on CPU_V7 + help + This option enables the workaround for the 857271 Cortex-A12 + (all revs) erratum. Under very rare timing conditions, the CPU might + hang. The workaround is expected to have a < 1% performance impact. + +config ARM_ERRATA_852421 + bool "ARM errata: A17: DMB ST might fail to create order between stores" + depends on CPU_V7 + help + This option enables the workaround for the 852421 Cortex-A17 + (r1p0, r1p1, r1p2) erratum. Under very rare timing conditions, + execution of a DMB ST instruction might fail to properly order + stores from GroupA and stores from GroupB. + +config ARM_ERRATA_852423 + bool "ARM errata: A17: some seqs of opposed cond code instrs => deadlock or corruption" + depends on CPU_V7 + help + This option enables the workaround for: + - Cortex-A17 852423: Execution of a sequence of instructions might + lead to either a data corruption or a CPU deadlock. Not fixed in + any Cortex-A17 cores yet. + This is identical to Cortex-A12 erratum 852422. It is a separate + config option from the A12 erratum due to the way errata are checked + for and handled. + +config ARM_ERRATA_857272 + bool "ARM errata: A17: CPU might deadlock under some very rare internal conditions" + depends on CPU_V7 + help + This option enables the workaround for the 857272 Cortex-A17 erratum. + This erratum is not known to be fixed in any A17 revision. + This is identical to Cortex-A12 erratum 857271. It is a separate + config option from the A12 erratum due to the way errata are checked + for and handled. + +endmenu + +source "arch/arm/common/Kconfig" + +menu "Bus support" + +config ISA + bool + help + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. Other bus systems are PCI, EISA, MicroChannel + (MCA) or VESA. ISA is an older system, now being displaced by PCI; + newer boards don't support it. If you have ISA, say Y, otherwise N. + +# Select ISA DMA controller support +config ISA_DMA + bool + select ISA_DMA_API + +# Select ISA DMA interface +config ISA_DMA_API + bool + +config PCI_NANOENGINE + bool "BSE nanoEngine PCI support" + depends on SA1100_NANOENGINE + help + Enable PCI on the BSE nanoEngine board. + +config ARM_ERRATA_814220 + bool "ARM errata: Cache maintenance by set/way operations can execute out of order" + depends on CPU_V7 + help + The v7 ARM states that all cache and branch predictor maintenance + operations that do not specify an address execute, relative to + each other, in program order. + However, because of this erratum, an L2 set/way cache maintenance + operation can overtake an L1 set/way cache maintenance operation. + This ERRATA only affected the Cortex-A7 and present in r0p2, r0p3, + r0p4, r0p5. + +endmenu + +menu "Kernel Features" + +config HAVE_SMP + bool + help + This option should be selected by machines which have an SMP- + capable CPU. + + The only effect of this option is to make the SMP-related + options available to the user for configuration. + +config SMP + bool "Symmetric Multi-Processing" + depends on CPU_V6K || CPU_V7 + depends on HAVE_SMP + depends on MMU || ARM_MPU + select IRQ_WORK + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you say N here, the kernel will run on uni- and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + uniprocessor machines. On a uniprocessor machine, the kernel + will run faster if you say N here. + + See also , + and the SMP-HOWTO available at + . + + If you don't know what to do here, say N. + +config SMP_ON_UP + bool "Allow booting SMP kernel on uniprocessor systems" + depends on SMP && !XIP_KERNEL && MMU + default y + help + SMP kernels contain instructions which fail on non-SMP processors. + Enabling this option allows the kernel to modify itself to make + these instructions safe. Disabling it allows about 1K of space + savings. + + If you don't know what to do here, say Y. + +config ARM_CPU_TOPOLOGY + bool "Support cpu topology definition" + depends on SMP && CPU_V7 + default y + help + Support ARM cpu topology definition. The MPIDR register defines + affinity between processors which is then used to describe the cpu + topology of an ARM System. + +config SCHED_MC + bool "Multi-core scheduler support" + depends on ARM_CPU_TOPOLOGY + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +config SCHED_SMT + bool "SMT scheduler support" + depends on ARM_CPU_TOPOLOGY + help + Improves the CPU scheduler's decision making when dealing with + MultiThreading at a cost of slightly increased overhead in some + places. If unsure say N here. + +config HAVE_ARM_SCU + bool + help + This option enables support for the ARM snoop control unit + +config HAVE_ARM_ARCH_TIMER + bool "Architected timer support" + depends on CPU_V7 + select ARM_ARCH_TIMER + help + This option enables support for the ARM architected timer + +config HAVE_ARM_TWD + bool + help + This options enables support for the ARM timer and watchdog unit + +config MCPM + bool "Multi-Cluster Power Management" + depends on CPU_V7 && SMP + help + This option provides the common power management infrastructure + for (multi-)cluster based systems, such as big.LITTLE based + systems. + +config MCPM_QUAD_CLUSTER + bool + depends on MCPM + help + To avoid wasting resources unnecessarily, MCPM only supports up + to 2 clusters by default. + Platforms with 3 or 4 clusters that use MCPM must select this + option to allow the additional clusters to be managed. + +config BIG_LITTLE + bool "big.LITTLE support (Experimental)" + depends on CPU_V7 && SMP + select MCPM + help + This option enables support selections for the big.LITTLE + system architecture. + +config BL_SWITCHER + bool "big.LITTLE switcher support" + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU && ARM_GIC + select CPU_PM + help + The big.LITTLE "switcher" provides the core functionality to + transparently handle transition between a cluster of A15's + and a cluster of A7's in a big.LITTLE system. + +config BL_SWITCHER_DUMMY_IF + tristate "Simple big.LITTLE switcher user interface" + depends on BL_SWITCHER && DEBUG_KERNEL + help + This is a simple and dummy char dev interface to control + the big.LITTLE switcher core code. It is meant for + debugging purposes only. + +choice + prompt "Memory split" + depends on MMU + default VMSPLIT_3G + help + Select the desired split between kernel and user memory. + + If you are not absolutely sure what you are doing, leave this + option alone! + + config VMSPLIT_3G + bool "3G/1G user/kernel split" + config VMSPLIT_3G_OPT + depends on !ARM_LPAE + bool "3G/1G user/kernel split (for full 1G low memory)" + config VMSPLIT_2G + bool "2G/2G user/kernel split" + config VMSPLIT_1G + bool "1G/3G user/kernel split" +endchoice + +config PAGE_OFFSET + hex + default PHYS_OFFSET if !MMU + default 0x40000000 if VMSPLIT_1G + default 0x80000000 if VMSPLIT_2G + default 0xB0000000 if VMSPLIT_3G_OPT + default 0xC0000000 + +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0x1f000000 if PAGE_OFFSET=0x40000000 + default 0x5f000000 if PAGE_OFFSET=0x80000000 + default 0x9f000000 if PAGE_OFFSET=0xC0000000 + default 0x8f000000 if PAGE_OFFSET=0xB0000000 + default 0xffffffff + +config NR_CPUS + int "Maximum number of CPUs (2-32)" + range 2 16 if DEBUG_KMAP_LOCAL + range 2 32 if !DEBUG_KMAP_LOCAL + depends on SMP + default "4" + help + The maximum number of CPUs that the kernel can support. + Up to 32 CPUs can be supported, or up to 16 if kmap_local() + debugging is enabled, which uses half of the per-CPU fixmap + slots as guard regions. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP + select GENERIC_IRQ_MIGRATION + help + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + +config ARM_PSCI + bool "Support for the ARM Power State Coordination Interface (PSCI)" + depends on HAVE_ARM_SMCCC + select ARM_PSCI_FW + help + Say Y here if you want Linux to communicate with system firmware + implementing the PSCI specification for CPU-centric power + management operations described in ARM document number ARM DEN + 0022A ("Power State Coordination Interface System Software on + ARM processors"). + +# The GPIO number here must be sorted by descending number. In case of +# a multiplatform kernel, we just want the highest value required by the +# selected platforms. +config ARCH_NR_GPIO + int + default 2048 if ARCH_INTEL_SOCFPGA + default 1024 if ARCH_BRCMSTB || ARCH_RENESAS || ARCH_TEGRA || \ + ARCH_ZYNQ || ARCH_ASPEED + default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \ + SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210 + default 416 if ARCH_SUNXI + default 392 if ARCH_U8500 + default 352 if ARCH_VT8500 + default 288 if ARCH_ROCKCHIP + default 264 if MACH_H4700 + default 0 + help + Maximum number of GPIOs in the system. + + If unsure, leave the default value. + +config HZ_FIXED + int + default 128 if SOC_AT91RM9200 + default 0 + +choice + depends on HZ_FIXED = 0 + prompt "Timer frequency" + +config HZ_100 + bool "100 Hz" + +config HZ_200 + bool "200 Hz" + +config HZ_250 + bool "250 Hz" + +config HZ_300 + bool "300 Hz" + +config HZ_500 + bool "500 Hz" + +config HZ_1000 + bool "1000 Hz" + +endchoice + +config HZ + int + default HZ_FIXED if HZ_FIXED != 0 + default 100 if HZ_100 + default 200 if HZ_200 + default 250 if HZ_250 + default 300 if HZ_300 + default 500 if HZ_500 + default 1000 + +config SCHED_HRTICK + def_bool HIGH_RES_TIMERS + +config THUMB2_KERNEL + bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY + depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K + default y if CPU_THUMBONLY + select ARM_UNWIND + help + By enabling this option, the kernel will be compiled in + Thumb-2 mode. + + If unsure, say N. + +config ARM_PATCH_IDIV + bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()" + depends on CPU_32v7 && !XIP_KERNEL + default y + help + The ARM compiler inserts calls to __aeabi_idiv() and + __aeabi_uidiv() when it needs to perform division on signed + and unsigned integers. Some v7 CPUs have support for the sdiv + and udiv instructions that can be used to implement those + functions. + + Enabling this option allows the kernel to modify itself to + replace the first two instructions of these library functions + with the sdiv or udiv plus "bx lr" instructions when the CPU + it is running on supports them. Typically this will be faster + and less power intensive than running the original library + code to do integer division. + +config AEABI + bool "Use the ARM EABI to compile the kernel" if !CPU_V7 && \ + !CPU_V7M && !CPU_V6 && !CPU_V6K && !CC_IS_CLANG + default CPU_V7 || CPU_V7M || CPU_V6 || CPU_V6K || CC_IS_CLANG + help + This option allows for the kernel to be compiled using the latest + ARM ABI (aka EABI). This is only useful if you are using a user + space environment that is also compiled with EABI. + + Since there are major incompatibilities between the legacy ABI and + EABI, especially with regard to structure member alignment, this + option also changes the kernel syscall calling convention to + disambiguate both ABIs and allow for backward compatibility support + (selected with CONFIG_OABI_COMPAT). + + To use this you need GCC version 4.0.0 or later. + +config OABI_COMPAT + bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)" + depends on AEABI && !THUMB2_KERNEL + help + This option preserves the old syscall interface along with the + new (ARM EABI) one. It also provides a compatibility layer to + intercept syscalls that have structure arguments which layout + in memory differs between the legacy ABI and the new ARM EABI + (only for non "thumb" binaries). This option adds a tiny + overhead to all syscalls and produces a slightly larger kernel. + + The seccomp filter system will not be available when this is + selected, since there is no way yet to sensibly distinguish + between calling conventions during filtering. + + If you know you'll be using only pure EABI user space then you + can say N here. If this option is not selected and you attempt + to execute a legacy ABI binary then the result will be + UNPREDICTABLE (in fact it can be predicted that it won't work + at all). If in doubt say N. + +config ARCH_SELECT_MEMORY_MODEL + bool + +config ARCH_FLATMEM_ENABLE + bool + +config ARCH_SPARSEMEM_ENABLE + bool + select SPARSEMEM_STATIC if SPARSEMEM + +config HIGHMEM + bool "High Memory Support" + depends on MMU + select KMAP_LOCAL + select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY + help + The address space of ARM processors is only 4 Gigabytes large + and it has to accommodate user address space, kernel address + space as well as some memory mapped IO. That means that, if you + have a large amount of physical memory and/or IO, not all of the + memory can be "permanently mapped" by the kernel. The physical + memory that is not permanently mapped is called "high memory". + + Depending on the selected kernel/user memory split, minimum + vmalloc space and actual amount of RAM, you may not need this + option which should result in a slightly faster kernel. + + If unsure, say n. + +config HIGHPTE + bool "Allocate 2nd-level pagetables from highmem" if EXPERT + depends on HIGHMEM + default y + help + The VM uses one page of physical memory for each page table. + For systems with a lot of processes, this can use a lot of + precious low memory, eventually leading to low memory being + consumed by page tables. Setting this option will allow + user-space 2nd level page tables to reside in high memory. + +config CPU_SW_DOMAIN_PAN + bool "Enable use of CPU domains to implement privileged no-access" + depends on MMU && !ARM_LPAE + default y + help + Increase kernel security by ensuring that normal kernel accesses + are unable to access userspace addresses. This can help prevent + use-after-free bugs becoming an exploitable privilege escalation + by ensuring that magic values (such as LIST_POISON) will always + fault when dereferenced. + + CPUs with low-vector mappings use a best-efforts implementation. + Their lower 1MB needs to remain accessible for the vectors, but + the remainder of userspace will become appropriately inaccessible. + +config HW_PERF_EVENTS + def_bool y + depends on ARM_PMU + +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + +config ARM_MODULE_PLTS + bool "Use PLTs to allow module memory to spill over into vmalloc area" + depends on MODULES + default y + help + Allocate PLTs when loading modules so that jumps and calls whose + targets are too far away for their relative offsets to be encoded + in the instructions themselves can be bounced via veneers in the + module's PLT. This allows modules to be allocated in the generic + vmalloc area after the dedicated module memory area has been + exhausted. The modules will use slightly more memory, but after + rounding up to page size, the actual memory footprint is usually + the same. + + Disabling this is usually safe for small single-platform + configurations. If unsure, say y. + +config FORCE_MAX_ZONEORDER + int "Maximum zone order" + default "12" if SOC_AM33XX + default "9" if SA1111 + default "11" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 11 means that the largest free memory block is 2^10 pages. + +config ALIGNMENT_TRAP + def_bool CPU_CP15_MMU + select HAVE_PROC_CPU if PROC_FS + help + ARM processors cannot fetch/store information which is not + naturally aligned on the bus, i.e., a 4 byte fetch must start at an + address divisible by 4. On 32-bit ARM processors, these non-aligned + fetch/store instructions will be emulated in software if you say + here, which has a severe performance impact. This is necessary for + correct operation of some network protocols. With an IP-only + configuration it is safe to say N, otherwise say Y. + +config UACCESS_WITH_MEMCPY + bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user()" + depends on MMU + default y if CPU_FEROCEON + help + Implement faster copy_to_user and clear_user methods for CPU + cores where a 8-word STM instruction give significantly higher + memory write throughput than a sequence of individual 32bit stores. + + A possible side effect is a slight increase in scheduling latency + between threads sharing the same address space if they invoke + such copy operations with large buffers. + + However, if the CPU data cache is using a write-allocate mode, + this option is unlikely to provide any performance gain. + +config PARAVIRT + bool "Enable paravirtualization code" + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + select PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + +config XEN_DOM0 + def_bool y + depends on XEN + +config XEN + bool "Xen guest support on ARM" + depends on ARM && AEABI && OF + depends on CPU_V7 && !CPU_V6 + depends on !GENERIC_ATOMIC64 + depends on MMU + select ARCH_DMA_ADDR_T_64BIT + select ARM_PSCI + select SWIOTLB + select SWIOTLB_XEN + select PARAVIRT + help + Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. + +config STACKPROTECTOR_PER_TASK + bool "Use a unique stack canary value for each task" + depends on GCC_PLUGINS && STACKPROTECTOR && SMP && !XIP_DEFLATED_DATA + select GCC_PLUGIN_ARM_SSP_PER_TASK + default y + help + Due to the fact that GCC uses an ordinary symbol reference from + which to load the value of the stack canary, this value can only + change at reboot time on SMP systems, and all tasks running in the + kernel's address space are forced to use the same canary value for + the entire duration that the system is up. + + Enable this option to switch to a different method that uses a + different canary value for each task. + +endmenu + +menu "Boot options" + +config USE_OF + bool "Flattened Device Tree support" + select IRQ_DOMAIN + select OF + help + Include support for flattened device tree machine descriptions. + +config ATAGS + bool "Support for the traditional ATAGS boot data passing" if USE_OF + default y + help + This is the traditional way of passing data to the kernel at boot + time. If you are solely relying on the flattened device tree (or + the ARM_ATAG_DTB_COMPAT option) then you may unselect this option + to remove ATAGS support from your kernel binary. If unsure, + leave this to y. + +config DEPRECATED_PARAM_STRUCT + bool "Provide old way to pass kernel parameters" + depends on ATAGS + help + This was deprecated in 2001 and announced to live on for 5 years. + Some old boot loaders still use this way. + +# Compressed boot loader in ROM. Yes, we really want to ask about +# TEXT and BSS so we preserve their values in the config files. +config ZBOOT_ROM_TEXT + hex "Compressed ROM boot loader base address" + default 0x0 + help + The physical address at which the ROM-able zImage is to be + placed in the target. Platforms which normally make use of + ROM-able zImage formats normally set this to a suitable + value in their defconfig file. + + If ZBOOT_ROM is not enabled, this has no effect. + +config ZBOOT_ROM_BSS + hex "Compressed ROM boot loader BSS address" + default 0x0 + help + The base address of an area of read/write memory in the target + for the ROM-able zImage which must be available while the + decompressor is running. It must be large enough to hold the + entire decompressed kernel plus an additional 128 KiB. + Platforms which normally make use of ROM-able zImage formats + normally set this to a suitable value in their defconfig file. + + If ZBOOT_ROM is not enabled, this has no effect. + +config ZBOOT_ROM + bool "Compressed boot loader in ROM/flash" + depends on ZBOOT_ROM_TEXT != ZBOOT_ROM_BSS + depends on !ARM_APPENDED_DTB && !XIP_KERNEL && !AUTO_ZRELADDR + help + Say Y here if you intend to execute your compressed kernel image + (zImage) directly from ROM or flash. If unsure, say N. + +config ARM_APPENDED_DTB + bool "Use appended device tree blob to zImage (EXPERIMENTAL)" + depends on OF + help + With this option, the boot code will look for a device tree binary + (DTB) appended to zImage + (e.g. cat zImage .dtb > zImage_w_dtb). + + This is meant as a backward compatibility convenience for those + systems with a bootloader that can't be upgraded to accommodate + the documented boot protocol using a device tree. + + Beware that there is very little in terms of protection against + this option being confused by leftover garbage in memory that might + look like a DTB header after a reboot if no actual DTB is appended + to zImage. Do not leave this option active in a production kernel + if you don't intend to always append a DTB. Proper passing of the + location into r2 of a bootloader provided DTB is always preferable + to this option. + +config ARM_ATAG_DTB_COMPAT + bool "Supplement the appended DTB with traditional ATAG information" + depends on ARM_APPENDED_DTB + help + Some old bootloaders can't be updated to a DTB capable one, yet + they provide ATAGs with memory configuration, the ramdisk address, + the kernel cmdline string, etc. Such information is dynamically + provided by the bootloader and can't always be stored in a static + DTB. To allow a device tree enabled kernel to be used with such + bootloaders, this option allows zImage to extract the information + from the ATAG list and store it at run time into the appended DTB. + +choice + prompt "Kernel command line type" if ARM_ATAG_DTB_COMPAT + default ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER + +config ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Uses the command-line options passed by the boot loader instead of + the device tree bootargs property. If the boot loader doesn't provide + any, the device tree bootargs property will be used. + +config ARM_ATAG_DTB_COMPAT_CMDLINE_EXTEND + bool "Extend with bootloader kernel arguments" + help + The command-line arguments provided by the boot loader will be + appended to the the device tree bootargs property. + +endchoice + +config CMDLINE + string "Default kernel command string" + default "" + help + On some architectures (e.g. CATS), there is currently no way + for the boot loader to pass arguments to the kernel. For these + architectures, you should supply some command-line options at build + time by entering them here. As a minimum, you should specify the + memory size and the root device (e.g., mem=64M root=/dev/nfs). + +choice + prompt "Kernel command line type" if CMDLINE != "" + default CMDLINE_FROM_BOOTLOADER + depends on ATAGS + +config CMDLINE_FROM_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Uses the command-line options passed by the boot loader. If + the boot loader doesn't provide any, the default kernel command + string provided in CMDLINE will be used. + +config CMDLINE_EXTEND + bool "Extend bootloader kernel arguments" + help + The command-line arguments provided by the boot loader will be + appended to the default kernel command string. + +config CMDLINE_FORCE + bool "Always use the default kernel command string" + help + Always use the default kernel command string, even if the boot + loader passes other arguments to the kernel. + This is useful if you cannot or don't want to change the + command-line options your boot loader passes to the kernel. +endchoice + +config XIP_KERNEL + bool "Kernel Execute-In-Place from ROM" + depends on !ARM_LPAE && !ARCH_MULTIPLATFORM + help + Execute-In-Place allows the kernel to run from non-volatile storage + directly addressable by the CPU, such as NOR flash. This saves RAM + space since the text section of the kernel is not loaded from flash + to RAM. Read-write sections, such as the data section and stack, + are still copied to RAM. The XIP kernel is not compressed since + it has to run directly from flash, so it will take more space to + store it. The flash address used to link the kernel object files, + and for storing it, is configuration dependent. Therefore, if you + say Y here, you must know the proper physical address where to + store the kernel image depending on your own flash memory usage. + + Also note that the make target becomes "make xipImage" rather than + "make zImage" or "make Image". The final kernel binary to put in + ROM memory will be arch/arm/boot/xipImage. + + If unsure, say N. + +config XIP_PHYS_ADDR + hex "XIP Kernel Physical Location" + depends on XIP_KERNEL + default "0x00080000" + help + This is the physical address in your flash memory the kernel will + be linked for and stored to. This address is dependent on your + own flash usage. + +config XIP_DEFLATED_DATA + bool "Store kernel .data section compressed in ROM" + depends on XIP_KERNEL + select ZLIB_INFLATE + help + Before the kernel is actually executed, its .data section has to be + copied to RAM from ROM. This option allows for storing that data + in compressed form and decompressed to RAM rather than merely being + copied, saving some precious ROM space. A possible drawback is a + slightly longer boot delay. + +config KEXEC + bool "Kexec system call (EXPERIMENTAL)" + depends on (!SMP || PM_SLEEP_SMP) + depends on MMU + select KEXEC_CORE + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. + +config ATAGS_PROC + bool "Export atags in procfs" + depends on ATAGS && KEXEC + default y + help + Should the atags used to boot the kernel be exported in an "atags" + file in procfs. Useful with kexec. + +config CRASH_DUMP + bool "Build kdump crash kernel (EXPERIMENTAL)" + help + Generate crash dump after being started by kexec. This should + be normally only set in special crash dump kernels which are + loaded in the main kernel with kexec-tools into a specially + reserved region and then later executed after a crash by + kdump/kexec. The crash dump kernel must be compiled to a + memory address not used by the main kernel + + For more details see Documentation/admin-guide/kdump/kdump.rst + +config AUTO_ZRELADDR + bool "Auto calculation of the decompressed kernel image address" + help + ZRELADDR is the physical address where the decompressed kernel + image will be placed. If AUTO_ZRELADDR is selected, the address + will be determined at run-time, either by masking the current IP + with 0xf8000000, or, if invalid, from the DTB passed in r2. + This assumes the zImage being placed in the first 128MB from + start of memory. + +config EFI_STUB + bool + +config EFI + bool "UEFI runtime support" + depends on OF && !CPU_BIG_ENDIAN && MMU && AUTO_ZRELADDR && !XIP_KERNEL + select UCS2_STRING + select EFI_PARAMS_FROM_FDT + select EFI_STUB + select EFI_GENERIC_STUB + select EFI_RUNTIME_WRAPPERS + help + This option provides support for runtime services provided + by UEFI firmware (such as non-volatile variables, realtime + clock, and platform reset). A UEFI stub is also provided to + allow the kernel to be booted as an EFI application. This + is only useful for kernels that may run on systems that have + UEFI firmware. + +config DMI + bool "Enable support for SMBIOS (DMI) tables" + depends on EFI + default y + help + This enables SMBIOS/DMI feature for systems. + + This option is only useful on systems that have UEFI firmware. + However, even with this option, the resultant kernel should + continue to boot on existing non-UEFI platforms. + + NOTE: This does *NOT* enable or encourage the use of DMI quirks, + i.e., the the practice of identifying the platform via DMI to + decide whether certain workarounds for buggy hardware and/or + firmware need to be enabled. This would require the DMI subsystem + to be enabled much earlier than we do on ARM, which is non-trivial. + +endmenu + +menu "CPU Power Management" + +source "drivers/cpufreq/Kconfig" + +source "drivers/cpuidle/Kconfig" + +endmenu + +menu "Floating point emulation" + +comment "At least one emulation must be selected" + +config FPE_NWFPE + bool "NWFPE math emulation" + depends on (!AEABI || OABI_COMPAT) && !THUMB2_KERNEL + help + Say Y to include the NWFPE floating point emulator in the kernel. + This is necessary to run most binaries. Linux does not currently + support floating point hardware so you need to say Y here even if + your machine has an FPA or floating point co-processor podule. + + You may say N here if you are going to load the Acorn FPEmulator + early in the bootup. + +config FPE_NWFPE_XP + bool "Support extended precision" + depends on FPE_NWFPE + help + Say Y to include 80-bit support in the kernel floating-point + emulator. Otherwise, only 32 and 64-bit support is compiled in. + Note that gcc does not generate 80-bit operations by default, + so in most cases this option only enlarges the size of the + floating point emulator without any good reason. + + You almost surely want to say N here. + +config FPE_FASTFPE + bool "FastFPE math emulation (EXPERIMENTAL)" + depends on (!AEABI || OABI_COMPAT) && !CPU_32v3 + help + Say Y here to include the FAST floating point emulator in the kernel. + This is an experimental much faster emulator which now also has full + precision for the mantissa. It does not support any exceptions. + It is very simple, and approximately 3-6 times faster than NWFPE. + + It should be sufficient for most programs. It may be not suitable + for scientific calculations, but you have to check this for yourself. + If you do not feel you need a faster FP emulation you should better + choose NWFPE. + +config VFP + bool "VFP-format floating point maths" + depends on CPU_V6 || CPU_V6K || CPU_ARM926T || CPU_V7 || CPU_FEROCEON + help + Say Y to include VFP support code in the kernel. This is needed + if your hardware includes a VFP unit. + + Please see for + release notes and additional status information. + + Say N if your target does not have VFP hardware. + +config VFPv3 + bool + depends on VFP + default y if CPU_V7 + +config NEON + bool "Advanced SIMD (NEON) Extension support" + depends on VFPv3 && CPU_V7 + help + Say Y to include support code for NEON, the ARMv7 Advanced SIMD + Extension. + +config KERNEL_MODE_NEON + bool "Support for NEON in kernel mode" + depends on NEON && AEABI + help + Say Y to include support for NEON in kernel mode. + +endmenu + +menu "Power management options" + +source "kernel/power/Kconfig" + +config ARCH_SUSPEND_POSSIBLE + depends on CPU_ARM920T || CPU_ARM926T || CPU_FEROCEON || CPU_SA1100 || \ + CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M || CPU_XSC3 || CPU_XSCALE || CPU_MOHAWK + def_bool y + +config ARM_CPU_SUSPEND + def_bool PM_SLEEP || BL_SWITCHER || ARM_PSCI_FW + depends on ARCH_SUSPEND_POSSIBLE + +config ARCH_HIBERNATION_POSSIBLE + bool + depends on MMU + default y if ARCH_SUSPEND_POSSIBLE + +endmenu + +if CRYPTO +source "arch/arm/crypto/Kconfig" +endif + +source "arch/arm/Kconfig.assembler" diff -ruN linux-org/arch/arm/kernel/asm-offsets.c linux/arch/arm/kernel/asm-offsets.c --- linux-org/arch/arm/kernel/asm-offsets.c 2022-03-25 09:55:36.309474516 +0100 +++ linux/arch/arm/kernel/asm-offsets.c 2022-03-25 10:15:23.341003056 +0100 @@ -64,6 +64,9 @@ #endif BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); +#ifdef CONFIG_IPIPE + DEFINE(TI_IPIPE, offsetof(struct thread_info, ipipe_flags)); +#endif DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); diff -ruN linux-org/arch/arm/kernel/devtree.c linux/arch/arm/kernel/devtree.c --- linux-org/arch/arm/kernel/devtree.c 2022-03-25 09:55:36.313474501 +0100 +++ linux/arch/arm/kernel/devtree.c 2022-03-25 10:15:23.341003056 +0100 @@ -188,6 +188,8 @@ cpu_logical_map(i) = tmp_map[i]; pr_debug("cpu logical map 0x%x\n", cpu_logical_map(i)); } + + smp_build_cpu_revmap(); } bool arch_match_cpu_phys_id(int cpu, u64 phys_id) diff -ruN linux-org/arch/arm/kernel/entry-armv.S linux/arch/arm/kernel/entry-armv.S --- linux-org/arch/arm/kernel/entry-armv.S 2022-03-25 09:55:36.313474501 +0100 +++ linux/arch/arm/kernel/entry-armv.S 2022-03-25 10:15:23.341003056 +0100 @@ -4,6 +4,7 @@ * Copyright (C) 1996,1997,1998 Russell King. * ARM700 fix by Matthew Godbolt (linux-user@willothewisp.demon.co.uk) * nommu support by Hyok S. Choi (hyok.choi@samsung.com) + * Copyright (C) 2005 Stelian Pop. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -48,6 +49,10 @@ arch_irq_handler_default #endif 9997: +#ifdef CONFIG_IPIPE + bl __ipipe_check_root_interruptible + cmp r0, #1 +#endif /* CONFIG_IPIPE */ .endm .macro pabt_helper @@ -200,6 +205,14 @@ #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + mov r0, #1 /* IPIPE_TRACE_BEGIN */ + mov r3, #0x90000000 + ldr r2, [sp, #S_PC] + mov r1, pc + bl ipipe_trace_asm + ldmia r7, {r2 - r6} +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ .endif .endm @@ -217,6 +230,9 @@ __irq_svc: svc_entry irq_handler +#ifdef CONFIG_IPIPE + bne __ipipe_fast_svc_irq_exit +#endif #ifdef CONFIG_PREEMPT ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -227,6 +243,9 @@ blne svc_preempt #endif +#ifdef CONFIG_IPIPE +__ipipe_fast_svc_irq_exit: +#endif svc_exit r5, irq = 1 @ return from exception UNWIND(.fnend ) ENDPROC(__irq_svc) @@ -236,12 +255,16 @@ #ifdef CONFIG_PREEMPT svc_preempt: mov r8, lr +#ifdef CONFIG_IPIPE +1: bl __ipipe_preempt_schedule_irq @ irq en/disable is done inside +#else /* CONFIG_IPIPE */ 1: bl preempt_schedule_irq @ irq en/disable is done inside +#endif /* CONFIG_IPIPE */ ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED reteq r8 @ go again b 1b -#endif +#endif /* CONFIG_PREEMPT */ __und_fault: @ Correct the PC such that it is pointing at the instruction @@ -266,6 +289,14 @@ #else svc_entry #endif + +#ifdef CONFIG_IPIPE + mov r0, #7 @ r0 = IPIPE_TRAP_UNDEFINSTR + mov r1, sp @ r1 = ®s + bl __ipipe_notify_trap @ branch to trap handler + cmp r0, #0 + bne __und_svc_finish +#endif /* CONFIG_IPIPE */ @ @ call emulation code, which returns using r9 if it has emulated @ the instruction, or the more conventional lr if we are to treat @@ -385,6 +416,15 @@ sub sp, sp, #PT_REGS_SIZE ARM( stmib sp, {r1 - r12} ) THUMB( stmia sp, {r0 - r12} ) +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + mov r4, r0 + mov r0, #1 /* IPIPE_TRACE_BEGIN */ + mov r3, #0x90000000 + ldr r2, [r4, #4] /* lr_ */ + mov r1, pc + bl ipipe_trace_asm + mov r0, r4 +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ATRAP( mrc p15, 0, r7, c1, c0, 0) ATRAP( ldr r8, .LCcralign) @@ -462,6 +502,10 @@ usr_entry kuser_cmpxchg_check irq_handler +#ifdef CONFIG_IPIPE + THUMB( it ne) + bne __ipipe_ret_to_user_irqs_disabled +#endif /* CONFIG_IPIPE */ get_thread_info tsk mov why, #0 b ret_to_user_from_irq @@ -474,6 +518,15 @@ __und_usr: usr_entry uaccess=0 +#ifdef CONFIG_IPIPE + mov r0, #7 @ r0 = IPIPE_TRAP_UNDEFINSTR + mov r1, sp @ r1 = ®s + bl __ipipe_notify_trap @ branch to trap handler + cmp r0, #0 + bne ret_from_exception + uaccess_enable ip +#endif /* CONFIG_IPIPE */ + mov r2, r4 mov r3, r5 @@ -756,7 +809,16 @@ ENTRY(ret_from_exception) UNWIND(.fnstart ) UNWIND(.cantunwind ) +#ifdef CONFIG_IPIPE + disable_irq get_thread_info tsk + ldr r0, [tsk, #TI_IPIPE] + tst r0, #_TIP_HEAD + THUMB( it ne) + bne __ipipe_ret_to_user_irqs_disabled @ Fast exit path over non-root domains +#else /* !CONFIG_IPIPE */ + get_thread_info tsk +#endif /* !CONFIG_IPIPE */ mov why, #0 b ret_to_user UNWIND(.fnend ) @@ -810,7 +872,11 @@ add r4, r2, #TI_CPU_SAVE ldr r0, =thread_notify_head mov r1, #THREAD_NOTIFY_SWITCH +#ifdef CONFIG_IPIPE + bl __ipipe_switch_to_notifier_call_chain +#else /* CONFIG_IPIPE */ bl atomic_notifier_call_chain +#endif /* CONFIG_IPIPE */ #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) str r7, [r8] #endif @@ -845,6 +911,50 @@ #endif .endm +#ifdef CONFIG_IPIPE +/* + I-pipe tsc area, here we store data shared with user-space for + tsc-emulation. If CONFIG_IPIPE_ARM_KUSER_TSC is enabled + __ipipe_kuser_get_tsc will be overwritten with the real TSC + emulation code. +*/ + .globl __ipipe_tsc_area + .equ __ipipe_tsc_area, CONFIG_VECTORS_BASE + 0x1000 + __ipipe_tsc_area_start - __kuser_helper_end + +#ifdef CONFIG_IPIPE_ARM_KUSER_TSC + .globl __ipipe_tsc_addr + .equ __ipipe_tsc_addr, CONFIG_VECTORS_BASE + 0x1000 + .LCcntr_addr - __kuser_helper_end + + .globl __ipipe_tsc_get + .equ __ipipe_tsc_get, CONFIG_VECTORS_BASE + 0x1000 + __ipipe_kuser_get_tsc - __kuser_helper_end +#endif + + .align 5 + .globl __ipipe_tsc_area_start +__ipipe_tsc_area_start: + .rep 3 + .word 0 + .endr + +#ifdef CONFIG_IPIPE_ARM_KUSER_TSC + .rep 4 + .word 0 + .endr +.LCcntr_addr: + .word 0 + + .align 5 +__ipipe_kuser_get_tsc: + nop + mov r0, #0 + mov r1, #0 + usr_ret lr + .rep 20 + .word 0 + .endr +#endif +#endif + .macro kuser_pad, sym, size .if (. - \sym) & 3 .rept 4 - (. - \sym) & 3 diff -ruN linux-org/arch/arm/kernel/entry-common.S linux/arch/arm/kernel/entry-common.S --- linux-org/arch/arm/kernel/entry-common.S 2022-03-25 09:55:36.313474501 +0100 +++ linux/arch/arm/kernel/entry-common.S 2022-03-25 10:15:23.341003056 +0100 @@ -2,6 +2,7 @@ * linux/arch/arm/kernel/entry-common.S * * Copyright (C) 2000 Russell King + * Copyright (C) 2005 Stelian Pop. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -37,6 +38,35 @@ #define TRACE(x...) #endif +#ifdef CONFIG_IPIPE + +.macro ipipe_oabi_save_sysnr, tmp +#ifdef CONFIG_OABI_COMPAT + ldr \tmp, [sp, #S_R7 + S_OFF] + stmdb sp!, {\tmp} + ldr \tmp, =sys_oabi_call_table + cmp \tmp, tbl + moveq \tmp, scno + addeq \tmp, #__NR_SYSCALL_BASE + streq \tmp, [sp, #S_R7 + S_OFF + 4] @ head domain expects sycall number in r7 +#elif !defined(CONFIG_AEABI) + ldr \tmp, [sp, #S_R7 + S_OFF] + stmdb sp!, {\tmp} + mov \tmp, scno + add \tmp, #__NR_SYSCALL_BASE + str \tmp, [sp, #S_R7 + S_OFF + 4] +#endif +.endm + +.macro ipipe_oabi_restore_sysnr, tmp +#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI) + ldmia sp!, {\tmp} + str \tmp, [sp, #S_R7 + S_OFF] +#endif +.endm + +#endif /* CONFIG_IPIPE */ + .align 5 #if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING)) /* @@ -56,11 +86,12 @@ tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK bne fast_work_pending - /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr restore_user_regs fast = 1, offset = S_OFF + + UNWIND(.fnend ) ENDPROC(ret_fast_syscall) @@ -122,12 +153,7 @@ bne slow_work_pending no_work_pending: asm_trace_hardirqs_on save = 0 - - /* perform architecture specific actions before user return */ - arch_ret_to_user r1, lr - ct_user_enter save = 0 - - restore_user_regs fast = 0, offset = 0 + slow_restore_user_regs ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user) @@ -135,6 +161,7 @@ * This is how we return from a fork. */ ENTRY(ret_from_fork) + enable_irq_cond bl schedule_tail cmp r5, #0 movne r0, r4 @@ -144,6 +171,14 @@ b ret_slow_syscall ENDPROC(ret_from_fork) +#ifdef CONFIG_IPIPE +__ipipe_ret_to_user: + disable_irq @ disable interrupts +ENTRY(__ipipe_ret_to_user_irqs_disabled) + slow_restore_user_regs +ENDPROC(__ipipe_ret_to_user_irqs_disabled) +#endif + /*============================================================================= * SWI handler *----------------------------------------------------------------------------- @@ -167,6 +202,16 @@ str r0, [sp, #S_OLD_R0] @ Save OLD_R0 #endif zero_fp +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + mov r4, lr + mov r0, #1 /* IPIPE_TRACE_BEGIN */ + mov r3, #0x90000000 + sub r2, lr, #4 /* calling PC */ + mov r1, pc + bl ipipe_trace_asm + mov lr, r4 + ldm sp, {r0 - r4} +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ alignment_trap r10, ip, __cr_alignment asm_trace_hardirqs_on save=0 enable_irq_notrace @@ -235,6 +280,55 @@ TRACE( ldmia sp, {r0 - r3} ) local_restart: +#ifdef CONFIG_IPIPE + ldr r10, [tsk, #TI_IPIPE] + ldr r0, =(__ARM_NR_ipipe - __NR_SYSCALL_BASE) + cmp scno, r0 + bne slow_path + tst r10, #_TIP_HEAD + beq slow_path + mov r0, sp + ipipe_oabi_save_sysnr r10 @ caution: affects sp + bl ipipe_fastcall_hook @ __IPIPE_SYSCALL_E is assumed + ipipe_oabi_restore_sysnr r10 + cmp r0, #0 + blt no_fastcall + get_thread_info tsk + ldr r10, [tsk, #TI_IPIPE] + tst r10, #_TIP_HEAD + bne fastcall_exit_check @ check for MAYDAY + bl __ipipe_root_sync + b ret_slow_syscall +fastcall_exit_check: + tst r10, #_TIP_MAYDAY + beq __ipipe_ret_to_user + mov r0, sp + bl __ipipe_call_mayday + b __ipipe_ret_to_user +no_fastcall: + get_thread_info tsk + ldr r0, =(__ARM_NR_ipipe - __NR_SYSCALL_BASE) + ldr r10, [tsk, #TI_IPIPE] +slow_path: + tst r10, #_TIP_NOTIFY + bne pipeline_syscall + cmp scno, r0 + bne root_syscall +pipeline_syscall: + mov r0, sp + ipipe_oabi_save_sysnr r10 @ caution: affects sp + bl __ipipe_notify_syscall + ipipe_oabi_restore_sysnr r10 + get_thread_info tsk + ldr r10, [tsk, #TI_IPIPE] + tst r10, #_TIP_HEAD + bne __ipipe_ret_to_user + cmp r0, #0 + bgt ret_slow_syscall +root_syscall: + ldmia sp, { r0 - r3 } +#endif /* CONFIG_IPIPE */ + ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing stmdb sp!, {r4, r5} @ push fifth and sixth args @@ -459,3 +553,27 @@ #endif +#if defined(CONFIG_FRAME_POINTER) && (CONFIG_IPIPE_TRACE) + + .text + .align 0 + .type arm_return_addr %function + .global arm_return_addr + +arm_return_addr: + mov ip, r0 + mov r0, fp +3: + cmp r0, #0 + beq 1f @ frame list hit end, bail + cmp ip, #0 + beq 2f @ reached desired frame + ldr r0, [r0, #-12] @ else continue, get next fp + sub ip, ip, #1 + b 3b +2: + ldr r0, [r0, #-4] @ get target return address +1: + mov pc, lr + +#endif diff -ruN linux-org/arch/arm/kernel/entry-header.S linux/arch/arm/kernel/entry-header.S --- linux-org/arch/arm/kernel/entry-header.S 2022-03-25 09:55:36.313474501 +0100 +++ linux/arch/arm/kernel/entry-header.S 2022-03-25 10:15:23.341003056 +0100 @@ -24,7 +24,7 @@ @ #define S_OFF 8 -/* +/* * The SWI code relies on the fact that R0 is at the bottom of the stack * (due to slow/fast restore user regs). */ @@ -201,6 +201,9 @@ .macro svc_exit, rpsr, irq = 0 .if \irq != 0 @ IRQs already off +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL + bl __ipipe_bugon_irqs_enabled +#endif #ifdef CONFIG_TRACE_IRQFLAGS @ The parent context IRQs must have been enabled to get here in @ the first place, so there's no point checking the PSR I bit. @@ -222,6 +225,14 @@ #ifndef CONFIG_THUMB2_KERNEL @ ARM mode SVC restore + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + mov r0, #2 /* IPIPE_TRACE_END */ + mov r3, #0x90000000 + ldr r2, [sp, #S_PC] + mov r1, pc + bl ipipe_trace_asm +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ msr spsr_cxsf, \rpsr #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @ We must avoid clrex due to Cortex-A15 erratum #830321 @@ -297,6 +308,22 @@ uaccess_enable r1, isb=0 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL + bl __ipipe_bugon_irqs_enabled +#endif +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + .if \fast + mov r4, r0 + .endif + mov r0, #2 /* IPIPE_TRACE_END */ + mov r3, #0x90000000 + ldr r2, [sp, #\offset + S_PC] + mov r1, pc + bl ipipe_trace_asm + .if \fast + mov r0, r4 + .endif +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ mov r2, sp ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr ldr lr, [r2, #\offset + S_PC]! @ get pc @@ -378,6 +405,13 @@ #endif .endm + .macro slow_restore_user_regs + /* perform architecture specific actions before user return */ + arch_ret_to_user r1, lr + ct_user_enter save = 0 + restore_user_regs fast = 0, offset = 0 + .endm + /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. diff -ruN linux-org/arch/arm/kernel/ipipe.c linux/arch/arm/kernel/ipipe.c --- linux-org/arch/arm/kernel/ipipe.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/kernel/ipipe.c 2022-03-25 10:15:23.341003056 +0100 @@ -0,0 +1,490 @@ +/* -*- linux-c -*- + * linux/arch/arm/kernel/ipipe.c + * + * Copyright (C) 2002-2005 Philippe Gerum. + * Copyright (C) 2004 Wolfgang Grandegger (Adeos/arm port over 2.4). + * Copyright (C) 2005 Heikki Lindholm (PowerPC 970 fixes). + * Copyright (C) 2005 Stelian Pop. + * Copyright (C) 2006-2008 Gilles Chanteperdrix. + * Copyright (C) 2010 Philippe Gerum (SMP port). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Architecture-dependent I-PIPE support for ARM. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void __ipipe_do_IRQ(unsigned irq, void *cookie); + +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL +void (*__ipipe_mach_hrtimer_debug)(unsigned irq); +#endif + +void ipipe_stall_root(void) +{ + unsigned long flags; + + ipipe_root_only(); + flags = hard_smp_local_irq_save(); + __set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); + hard_smp_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ipipe_stall_root); + +unsigned long ipipe_test_and_stall_root(void) +{ + unsigned long flags; + int x; + + ipipe_root_only(); + flags = hard_smp_local_irq_save(); + x = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); + hard_smp_local_irq_restore(flags); + + return x; +} +EXPORT_SYMBOL_GPL(ipipe_test_and_stall_root); + +unsigned long ipipe_test_root(void) +{ + unsigned long flags; + int x; + + flags = hard_smp_local_irq_save(); + x = test_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); + hard_smp_local_irq_restore(flags); + + return x; +} +EXPORT_SYMBOL_GPL(ipipe_test_root); + +#ifdef CONFIG_SMP + +struct __ipipe_vnmidata { + void (*fn)(void *); + void *arg; + cpumask_t cpumask; +}; + +static struct __ipipe_vnmislot { + ipipe_spinlock_t lock; + struct __ipipe_vnmidata *data; + ipipe_rwlock_t data_lock; +} __ipipe_vnmi __cacheline_aligned_in_smp = { + .lock = IPIPE_SPIN_LOCK_UNLOCKED, + .data = NULL, + .data_lock = IPIPE_RW_LOCK_UNLOCKED, +}; + +void __ipipe_early_core_setup(void) +{ + __ipipe_mach_init_platform(); +} + +void __ipipe_do_vnmi(unsigned int irq, void *cookie) +{ + int cpu = ipipe_processor_id(); + struct __ipipe_vnmidata *data; + + read_lock(&__ipipe_vnmi.data_lock); + + data = __ipipe_vnmi.data; + if (likely(data && cpumask_test_cpu(cpu, &data->cpumask))) { + data->fn(data->arg); + cpumask_clear_cpu(cpu, &data->cpumask); + } + + read_unlock(&__ipipe_vnmi.data_lock); +} + +static inline void +hook_internal_ipi(struct ipipe_domain *ipd, int virq, + void (*handler)(unsigned int irq, void *cookie)) +{ + ipd->irqs[virq].ackfn = NULL; + ipd->irqs[virq].handler = handler; + ipd->irqs[virq].cookie = NULL; + /* Immediately handle in the current domain but *never* pass */ + ipd->irqs[virq].control = IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK; +} + +void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd) +{ + __ipipe_ipis_alloc(); + hook_internal_ipi(ipd, IPIPE_CRITICAL_IPI, __ipipe_do_critical_sync); + hook_internal_ipi(ipd, IPIPE_SERVICE_VNMI, __ipipe_do_vnmi); +} + +void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask) +{ + if (ipipe_virtual_irq_p(irq) || + irq_get_chip(irq)->irq_set_affinity == NULL) + return; + + cpumask_and(&cpumask, &cpumask, cpu_online_mask); + if (WARN_ON_ONCE(cpumask_empty(&cpumask))) + return; + + irq_get_chip(irq)->irq_set_affinity(irq_get_irq_data(irq), &cpumask, true); +} +EXPORT_SYMBOL_GPL(ipipe_set_irq_affinity); + +void __ipipe_send_vnmi(void (*fn)(void *), cpumask_t cpumask, void *arg) +{ + struct __ipipe_vnmidata data; + unsigned long flags; + int cpu; + + data.fn = fn; + data.arg = arg; + data.cpumask = cpumask; + + while (!spin_trylock_irqsave(&__ipipe_vnmi.lock, flags)) { + if (hard_irqs_disabled()) + __ipipe_do_vnmi(IPIPE_SERVICE_VNMI, NULL); + cpu_relax(); + } + + cpu = ipipe_processor_id(); + cpumask_clear_cpu(cpu, &data.cpumask); + if (cpumask_empty(&data.cpumask)) { + spin_unlock_irqrestore(&__ipipe_vnmi.lock, flags); + return; + } + + write_lock(&__ipipe_vnmi.data_lock); + __ipipe_vnmi.data = &data; + write_unlock(&__ipipe_vnmi.data_lock); + + ipipe_send_ipi(IPIPE_SERVICE_VNMI, data.cpumask); + while (!cpumask_empty(&data.cpumask)) + cpu_relax(); + + write_lock(&__ipipe_vnmi.data_lock); + __ipipe_vnmi.data = NULL; + write_unlock(&__ipipe_vnmi.data_lock); + + spin_unlock_irqrestore(&__ipipe_vnmi.lock, flags); +} +EXPORT_SYMBOL_GPL(__ipipe_send_vnmi); +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_SMP_ON_UP +struct static_key __ipipe_smp_key = STATIC_KEY_INIT_TRUE; +EXPORT_SYMBOL_GPL(__ipipe_smp_key); + +unsigned notrace __ipipe_processor_id(void) +{ + return raw_smp_processor_id(); +} +EXPORT_SYMBOL_GPL(__ipipe_processor_id); + +static int ipipe_disable_smp(void) +{ + if (num_online_cpus() == 1) { + unsigned long flags; + + printk("I-pipe: disabling SMP code\n"); + + flags = hard_local_irq_save(); + static_key_slow_dec(&__ipipe_smp_key); + hard_local_irq_restore(flags); + } + return 0; +} +arch_initcall(ipipe_disable_smp); + +extern unsigned int smp_on_up; +EXPORT_SYMBOL_GPL(smp_on_up); +#endif /* SMP_ON_UP */ + +__weak void __ipipe_mach_get_tscinfo(struct __ipipe_tscinfo *info) +{ + info->type = IPIPE_TSC_TYPE_NONE; +} + +int ipipe_get_sysinfo(struct ipipe_sysinfo *info) +{ + info->sys_nr_cpus = num_online_cpus(); + info->sys_cpu_freq = __ipipe_hrclock_freq; + info->sys_hrtimer_irq = per_cpu(ipipe_percpu.hrtimer_irq, 0); + info->sys_hrtimer_freq = __ipipe_hrtimer_freq; + info->sys_hrclock_freq = __ipipe_hrclock_freq; + __ipipe_mach_get_tscinfo(&info->arch.tsc); + + return 0; +} +EXPORT_SYMBOL_GPL(ipipe_get_sysinfo); + +/* + * __ipipe_enable_pipeline() -- We are running on the boot CPU, hw + * interrupts are off, and secondary CPUs are still lost in space. + */ +void __ipipe_enable_pipeline(void) +{ + unsigned long flags; + unsigned int irq; + + flags = ipipe_critical_enter(NULL); + + /* virtualize all interrupts from the root domain. */ + for (irq = 0; irq < IPIPE_NR_ROOT_IRQS; irq++) + ipipe_request_irq(ipipe_root_domain, + irq, + (ipipe_irq_handler_t)__ipipe_do_IRQ, + NULL, NULL); + +#ifdef CONFIG_SMP + __ipipe_ipis_request(); +#endif /* CONFIG_SMP */ + + ipipe_critical_exit(flags); +} + +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL +unsigned asmlinkage __ipipe_bugon_irqs_enabled(unsigned x) +{ + BUG_ON(!hard_irqs_disabled()); + return x; /* Preserve r0 */ +} +#endif + +asmlinkage int __ipipe_check_root_interruptible(void) +{ + return __ipipe_root_p && !irqs_disabled(); +} + +__kprobes int +__ipipe_switch_to_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = atomic_notifier_call_chain(nh, val, v); + __ipipe_restore_root_nosync(flags); + + return ret; +} + +void __ipipe_exit_irq(struct pt_regs *regs) +{ + /* + * Testing for user_regs() eliminates foreign stack contexts, + * including from legacy domains which did not set the foreign + * stack bit (foreign stacks are always kernel-based). + */ + if (user_mode(regs) && + ipipe_test_thread_flag(TIP_MAYDAY)) { + /* + * MAYDAY is never raised under normal circumstances, + * so prefer test then maybe clear over + * test_and_clear. + */ + ipipe_clear_thread_flag(TIP_MAYDAY); + __ipipe_notify_trap(IPIPE_TRAP_MAYDAY, regs); + } +} + +void printascii(const char *s); +/* hw irqs off */ +asmlinkage void __exception __ipipe_grab_irq(int irq, struct pt_regs *regs) +{ + struct ipipe_percpu_data *p = __ipipe_raw_cpu_ptr(&ipipe_percpu); + +#if 0 + if (irq == 16) + printascii("*"); + else + printascii("#"); +#endif + + ipipe_trace_irq_entry(irq); + + if (p->hrtimer_irq == -1) + goto copy_regs; + + if (irq == p->hrtimer_irq) { + /* + * Given our deferred dispatching model for regular IRQs, we + * only record CPU regs for the last timer interrupt, so that + * the timer handler charges CPU times properly. It is assumed + * that other interrupt handlers don't actually care for such + * information. + */ +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL + if (__ipipe_mach_hrtimer_debug) + __ipipe_mach_hrtimer_debug(irq); +#endif /* CONFIG_IPIPE_DEBUG_INTERNAL */ + copy_regs: + p->tick_regs.ARM_cpsr = + (p->curr == &p->root + ? regs->ARM_cpsr + : regs->ARM_cpsr | PSR_I_BIT); + p->tick_regs.ARM_pc = regs->ARM_pc; + } + + __ipipe_dispatch_irq(irq, 0); + + ipipe_trace_irq_exit(irq); + + __ipipe_exit_irq(regs); +} + +static void __ipipe_do_IRQ(unsigned irq, void *cookie) +{ + handle_IRQ(irq, raw_cpu_ptr(&ipipe_percpu.tick_regs)); +} + +#ifdef CONFIG_MMU +void __switch_mm_inner(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + struct mm_struct ** const active_mm = + raw_cpu_ptr(&ipipe_percpu.active_mm); +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + struct thread_info *const tip = current_thread_info(); + prev = *active_mm; + clear_bit(TIF_MMSWITCH_INT, &tip->flags); + barrier(); + *active_mm = NULL; + barrier(); + for (;;) { + unsigned long flags; +#endif /* CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + + int rc __maybe_unused = __do_switch_mm(prev, next, tsk, true); + +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + /* + * Reading thread_info flags and setting active_mm + * must be done atomically. + */ + flags = hard_local_irq_save(); + if (__test_and_clear_bit(TIF_MMSWITCH_INT, &tip->flags) == 0) { + if (rc < 0) + *active_mm = prev; + else + *active_mm = next; + hard_local_irq_restore(flags); + return; + } + hard_local_irq_restore(flags); + + if (rc < 0) + /* + * We were interrupted by head domain, which + * may have changed the mm context, mm context + * is now unknown, but will be switched in + * deferred_switch_mm + */ + return; + + prev = NULL; + } +#else + if (rc < 0) + *active_mm = prev; + else + *active_mm = next; +#endif /* !IPIPE_WANT_PREEMPTIBLE_SWITCH */ +} + +#ifdef finish_arch_post_lock_switch +void deferred_switch_mm(struct mm_struct *next) +{ + struct mm_struct ** const active_mm = + raw_cpu_ptr(&ipipe_percpu.active_mm); + struct mm_struct *prev = *active_mm; +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + struct thread_info *const tip = current_thread_info(); + clear_bit(TIF_MMSWITCH_INT, &tip->flags); + barrier(); + *active_mm = NULL; + barrier(); + for (;;) { + unsigned long flags; +#endif /* CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + + __do_switch_mm(prev, next, NULL, false); + +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + /* + * Reading thread_info flags and setting active_mm + * must be done atomically. + */ + flags = hard_local_irq_save(); + if (__test_and_clear_bit(TIF_MMSWITCH_INT, &tip->flags) == 0) { + *active_mm = next; + hard_local_irq_restore(flags); + return; + } + hard_local_irq_restore(flags); + prev = NULL; + } +#else + *active_mm = next; +#endif /* CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ +} +#endif +#endif /* CONFIG_MMU */ + +EXPORT_SYMBOL_GPL(do_munmap); +EXPORT_SYMBOL_GPL(show_stack); +EXPORT_SYMBOL_GPL(init_mm); +#ifndef MULTI_CPU +EXPORT_SYMBOL_GPL(cpu_do_switch_mm); +#endif +EXPORT_SYMBOL_GPL(__check_vmalloc_seq); +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +EXPORT_SYMBOL_GPL(tasklist_lock); +#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */ + +#ifndef CONFIG_SPARSE_IRQ +EXPORT_SYMBOL_GPL(irq_desc); +#endif + +#ifdef CONFIG_CPU_HAS_ASID +EXPORT_SYMBOL_GPL(check_and_switch_context); +#endif /* CONFIG_CPU_HAS_ASID */ + +EXPORT_SYMBOL_GPL(cpu_architecture); diff -ruN linux-org/arch/arm/kernel/ipipe_tsc_asm.S linux/arch/arm/kernel/ipipe_tsc_asm.S --- linux-org/arch/arm/kernel/ipipe_tsc_asm.S 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/kernel/ipipe_tsc_asm.S 2022-03-25 10:15:23.341003056 +0100 @@ -0,0 +1,298 @@ +#include +#include +#include + + .macro usr_ret, reg +#ifdef CONFIG_ARM_THUMB + bx \reg +#else + mov pc, \reg +#endif + .endm + + .macro usr_reteq, reg +#ifdef CONFIG_ARM_THUMB + bxeq \reg +#else + moveq pc, \reg +#endif + .endm + + .macro myldrd, rd1, rd2, rtmp, label +#if __LINUX_ARM_ARCH__ < 5 + adr \rtmp, \label + ldm \rtmp, { \rd1, \rd2 } +#else + ldrd \rd1, \label +#endif + .endm + +/* + We use the same mechanism as Linux user helpers to store + variables and functions related to TSC emulation, so that they + can also be used in user-space. + + The function ipipe_tsc_register will copy the proper + implemntation to the vectors page. We repeat the data area so + that the PC relative operations are computed correctly. +*/ + + .section .init.text, "ax", %progbits + THUMB( .arm ) + + .align 5 + .rep 7 + .word 0 + .endr +.LCfr64_cntr_addr: + .word 0 + + .align 5 + .globl __ipipe_freerunning_64 +__ipipe_freerunning_64: + ldr r0, .LCfr64_cntr_addr +/* User-space entry-point: r0 is the hardware counter virtual address */ + mov r2, r0 +#ifndef CONFIG_CPU_BIG_ENDIAN +/* Little endian */ + ldr r1, [r2, #4] +1: ldr r0, [r2] + ldr r3, [r2, #4] + cmp r3, r1 + usr_reteq lr + mov r1, r3 + b 1b +#else /* Big endian */ + ldr r0, [r2] +1: ldr r1, [r2, #4] + ldr r3, [r2] + cmp r3, r0 + usr_reteq lr + mov r0, r3 + b 1b +#endif /* Big endian */ + + .align 5 +.LCfr32_last_tsc: + .rep 7 + .word 0 + .endr +.LCfr32_cntr_addr: + .word 0 + + .align 5 + .globl __ipipe_freerunning_32 +__ipipe_freerunning_32: + ldr r0, .LCfr32_cntr_addr +/* User-space entry-point: r0 is the hardware counter virtual address */ + myldrd r2, r3, r1, .LCfr32_last_tsc +#ifndef CONFIG_CPU_BIG_ENDIAN +/* Little endian */ + ldr r0, [r0] + cmp r2, r0 + adc r1, r3, #0 +#else /* Big endian */ + ldr r1, [r0] + cmp r3, r1 + adc r0, r2, #0 +#endif /* Big endian */ + usr_ret lr + + .align 5 +.LCfrcd32_last_tsc: + .rep 7 + .word 0 + .endr +.LCfrcd32_cntr_addr: + .word 0 + + .align 5 + .globl __ipipe_freerunning_countdown_32 +__ipipe_freerunning_countdown_32: + ldr r0, .LCfrcd32_cntr_addr +/* User-space entry-point: r0 is the hardware counter virtual address */ + myldrd r2, r3, r1, .LCfrcd32_last_tsc +#ifndef CONFIG_CPU_BIG_ENDIAN +/* Little endian */ + ldr r0, [r0] + mvn r0, r0 + cmp r2, r0 + adc r1, r3, #0 +#else /* Big endian */ + ldr r1, [r0] + mvn r1, r1 + cmp r3, r1 + adc r0, r2, #0 +#endif /* Big endian */ + usr_ret lr + + .align 5 +.LCfr16_last_tsc: + .rep 7 + .word 0 + .endr +.LCfr16_cntr_addr: + .word 0 + + .align 5 + .globl __ipipe_freerunning_16 +__ipipe_freerunning_16: + ldr r0, .LCfr16_cntr_addr +/* User-space entry-point: r0 is the hardware counter virtual address */ +1: myldrd r2, r3, r1, .LCfr16_last_tsc + ldrh ip, [r0] +#ifndef CONFIG_CPU_BIG_ENDIAN +/* Little endian */ + ldr r1, .LCfr16_last_tsc + cmp r1, r2 + mov r1, r2, lsr #16 + bne 1b + orr r0, ip, r1, lsl #16 + cmp r2, r0 + addhis r0, r0, #0x10000 + adc r1, r3, #0 +#else /* Big endian */ + ldr r1, .LCfr16_last_tsc + 4 + cmp r1, r3 + mov r1, r3, lsr #16 + bne 1b + orr r1, ip, r1, lsl #16 + cmp r3, r1 + addhis r1, r1, #0x10000 + adc r0, r2, #0 +#endif /* Big endian */ + usr_ret lr + + .align 5 +.LCfrcd16_last_tsc: + .rep 7 + .word 0 + .endr +.LCfrcd16_cntr_addr: + .word 0 + + .align 5 + .globl __ipipe_freerunning_countdown_16 +__ipipe_freerunning_countdown_16: + ldr r0, .LCfrcd16_cntr_addr +/* User-space entry-point: r0 is the hardware counter virtual address */ +1: myldrd r2, r3, r1, .LCfrcd16_last_tsc + ldrh ip, [r0] +#ifndef CONFIG_CPU_BIG_ENDIAN +/* Little endian */ + ldr r1, .LCfrcd16_last_tsc + rsb ip, ip, #0x10000 + cmp r1, r2 + mov r1, r2, lsr #16 + bne 1b + orr r0, ip, r1, lsl #16 + cmp r2, r0 + addhis r0, r0, #0x10000 + adc r1, r3, #0 +#else /* Big endian */ + ldr r1, .LCfrcd16_last_tsc + 4 + rsb ip, ip, #0x10000 + cmp r1, r3 + mov r1, r3, lsr #16 + bne 1b + orr r1, ip, r1, lsl #16 + cmp r3, r1 + addhis r1, r1, #0x10000 + adc r0, r2, #0 +#endif /* Big endian */ + usr_ret lr + + .align 5 +.LCfrt16_last_tsc: + .rep 7 + .word 0 + .endr +.LCfrt16_cntr_addr: + .word 0 + + .align 5 + .globl __ipipe_freerunning_twice_16 +__ipipe_freerunning_twice_16: + ldr r0, .LCfrt16_cntr_addr +/* User-space entry-point: r0 is the hardware counter virtual address */ +1: myldrd r2, r3, r1, .LCfrt16_last_tsc +2: ldrh ip, [r0] + ldrh r1, [r0] + cmp r1, ip + bne 2b +#ifndef CONFIG_CPU_BIG_ENDIAN +/* Little endian */ + ldr r1, .LCfrt16_last_tsc + cmp r1, r2 + mov r1, r2, lsr #16 + bne 1b + orr r0, ip, r1, lsl #16 + cmp r2, r0 + addhis r0, r0, #0x10000 + adc r1, r3, #0 +#else /* Big endian */ + ldr r1, .LCfrt16_last_tsc + 4 + cmp r1, r3 + mov r1, r3, lsr #16 + bne 1b + orr r1, ip, r1, lsl #16 + cmp r3, r1 + addhis r1, r1, #0x10000 + adc r0, r2, #0 +#endif /* Big endian */ + usr_ret lr + + .align 5 +.LCdec16_last_tsc: + .rep 2 + .word 0 + .endr +.LCdec16_last_cnt: + .rep 5 + .word 0 + .endr +.LCdec16_cntr_addr: + .word 0 + + .align 5 + .globl __ipipe_decrementer_16 +__ipipe_decrementer_16: + ldr r0, .LCdec16_cntr_addr +/* User-space entry-point: r0 is the hardware counter virtual address */ +#ifndef CONFIG_CPU_BIG_ENDIAN +/* Little endian */ +1: ldr r1, .LCdec16_last_tsc + ldrh ip, [r0] + ldr r2, .LCdec16_last_cnt + subs ip, r2, ip + addcc ip, ip, #0x10000 + myldrd r2, r3, r3, .LCdec16_last_tsc + cmp r1, r2 + bne 1b + adds r0, ip, r2 + adc r1, r3, #0 +#else /* Big endian */ +1: ldr r1, .LCdec16_last_tsc + 4 + ldrh ip, [r0] + ldr r2, .LCdec16_last_cnt + subs ip, r2, ip + addcc ip, ip, #0x10000 + myldrd r2, r3, r3, .LCdec16_last_tsc + cmp r1, r3 + bne 1b + adds r1, ip, r3 + adc r0, r2, #0 +#endif /* Big endian */ + usr_ret lr + + .align 5 + .globl __ipipe_freerunning_arch +__ipipe_freerunning_arch: + nop +#ifdef CONFIG_ARM_ARCH_TIMER + mrrc p15, 0, r0, r1, c14 +#else + mov r0, #0 + mov r1, #0 +#endif + usr_ret lr diff -ruN linux-org/arch/arm/kernel/ipipe_tsc.c linux/arch/arm/kernel/ipipe_tsc.c --- linux-org/arch/arm/kernel/ipipe_tsc.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/kernel/ipipe_tsc.c 2022-03-25 10:15:23.341003056 +0100 @@ -0,0 +1,276 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef unsigned long long __ipipe_tsc_t(void); + +extern __ipipe_tsc_t __ipipe_freerunning_64, + __ipipe_freerunning_32, + __ipipe_freerunning_countdown_32, + __ipipe_freerunning_16, + __ipipe_freerunning_countdown_16, + __ipipe_decrementer_16, + __ipipe_freerunning_twice_16, + __ipipe_freerunning_arch; +extern unsigned long __ipipe_tsc_addr; + +static struct __ipipe_tscinfo tsc_info; + +static struct clocksource clksrc = { + .name = "ipipe_tsc", + .rating = 0x7fffffff, + .read = (typeof(clksrc.read))__ipipe_tsc_get, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +struct ipipe_tsc_value_t { + unsigned long long last_tsc; + unsigned last_cnt; +}; + +unsigned long __ipipe_kuser_tsc_freq; + +struct ipipe_tsc_value_t *ipipe_tsc_value; +static struct timer_list ipipe_tsc_update_timer; + +static void __ipipe_tsc_update_fn(unsigned long cookie) +{ + __ipipe_tsc_update(); + ipipe_tsc_update_timer.expires += cookie; + add_timer(&ipipe_tsc_update_timer); +} + +void __init __ipipe_tsc_register(struct __ipipe_tscinfo *info) +{ + struct ipipe_tsc_value_t *vector_tsc_value; + unsigned long long wrap_ms; + unsigned long *tsc_addr; + __ipipe_tsc_t *implem; + unsigned long flags; + int registered; + char *tsc_area; + +#if !defined(CONFIG_CPU_USE_DOMAINS) + extern char __ipipe_tsc_area_start[], __kuser_helper_end[]; + + tsc_area = (char *)vectors_page + 0x1000 + + (__ipipe_tsc_area_start - __kuser_helper_end); + tsc_addr = (unsigned long *) + (tsc_area + ((char *)&__ipipe_tsc_addr - __ipipe_tsc_area)); +#else + tsc_area = __ipipe_tsc_area; + tsc_addr = &__ipipe_tsc_addr; +#endif + registered = ipipe_tsc_value != NULL; + + if (WARN_ON(info->freq == 0)) + return; + + if (registered && info->freq < tsc_info.freq) + return; + + ipipe_tsc_value = (struct ipipe_tsc_value_t *)tsc_area; + vector_tsc_value = (struct ipipe_tsc_value_t *)__ipipe_tsc_area; + + switch(info->type) { + case IPIPE_TSC_TYPE_FREERUNNING: + switch(info->u.mask) { + case 0xffff: + implem = &__ipipe_freerunning_16; + break; + case 0xffffffff: + implem = &__ipipe_freerunning_32; + break; + case 0xffffffffffffffffULL: + implem = &__ipipe_freerunning_64; + break; + default: + goto unimplemented; + } + break; + + case IPIPE_TSC_TYPE_DECREMENTER: + if (info->u.mask != 0xffff) + goto unimplemented; + implem = &__ipipe_decrementer_16; + break; + + case IPIPE_TSC_TYPE_FREERUNNING_COUNTDOWN: + switch(info->u.mask) { + case 0xffff: + implem = &__ipipe_freerunning_countdown_16; + break; + case 0xffffffff: + implem = &__ipipe_freerunning_countdown_32; + break; + default: + goto unimplemented; + } + break; + + case IPIPE_TSC_TYPE_FREERUNNING_TWICE: + if (info->u.mask != 0xffff) + goto unimplemented; + implem = &__ipipe_freerunning_twice_16; + break; + + case IPIPE_TSC_TYPE_FREERUNNING_ARCH: + implem = &__ipipe_freerunning_arch; + break; + + default: + unimplemented: + printk("I-pipe: Unimplemented tsc configuration, " + "type: %d, mask: 0x%08Lx\n", info->type, info->u.mask); + BUG(); + } + + tsc_info = *info; + *tsc_addr = tsc_info.counter_vaddr; + if (tsc_info.type == IPIPE_TSC_TYPE_DECREMENTER) { + tsc_info.u.dec.last_cnt = &vector_tsc_value->last_cnt; + tsc_info.u.dec.tsc = &vector_tsc_value->last_tsc; + } else + tsc_info.u.fr.tsc = &vector_tsc_value->last_tsc; + + flags = hard_local_irq_save(); + ipipe_tsc_value->last_tsc = 0; + memcpy(tsc_area + 0x20, implem, 0x60); + flush_icache_range((unsigned long)(tsc_area), + (unsigned long)(tsc_area + 0x80)); + hard_local_irq_restore(flags); + + __ipipe_kuser_tsc_freq = tsc_info.freq; + + wrap_ms = info->u.mask; + do_div(wrap_ms, tsc_info.freq / 1000); + + printk(KERN_INFO "I-pipe, %u.%03u MHz clocksource, wrap in %Lu ms\n", + tsc_info.freq / 1000000, (tsc_info.freq % 1000000) / 1000, + wrap_ms); + + if (!registered) { + init_timer(&ipipe_tsc_update_timer); + clocksource_register_hz(&clksrc, tsc_info.freq); + } else + __clocksource_update_freq_hz(&clksrc, tsc_info.freq); + + wrap_ms *= HZ / 2; + do_div(wrap_ms, 1000); + if (wrap_ms > 0x7fffffff) + wrap_ms = 0x7fffffff; + ipipe_tsc_update_timer.data = wrap_ms; + ipipe_tsc_update_timer.function = __ipipe_tsc_update_fn; + mod_timer(&ipipe_tsc_update_timer, + jiffies + ipipe_tsc_update_timer.data); + + __ipipe_tracer_hrclock_initialized(); +} + +void __ipipe_mach_get_tscinfo(struct __ipipe_tscinfo *info) +{ + *info = tsc_info; +} + +void __ipipe_tsc_update(void) +{ + if (tsc_info.type == IPIPE_TSC_TYPE_DECREMENTER) { + unsigned cnt = *(unsigned *)tsc_info.counter_vaddr; + int offset = ipipe_tsc_value->last_cnt - cnt; + if (offset < 0) + offset += tsc_info.u.dec.mask + 1; + ipipe_tsc_value->last_tsc += offset; + ipipe_tsc_value->last_cnt = cnt; + return; + } + + /* Update last_tsc, in order to remain compatible with legacy + user-space 32 bits free-running counter implementation */ + ipipe_tsc_value->last_tsc = __ipipe_tsc_get() - 1; +} +EXPORT_SYMBOL(__ipipe_tsc_get); + +void __ipipe_update_vsyscall(struct timekeeper *tk) +{ + if (tk->tkr_mono.clock == &clksrc) + ipipe_update_hostrt(tk); +} + +#if !IS_ENABLED(CONFIG_VDSO) +void update_vsyscall(struct timekeeper *tk) +{ + __ipipe_update_vsyscall(tk); +} + +void update_vsyscall_tz(void) +{ +} +#endif + +#ifdef CONFIG_CPU_FREQ + +static __init void update_timer_freq(void *data) +{ + unsigned int hrclock_freq = *(unsigned int *)data; + + __ipipe_timer_refresh_freq(hrclock_freq); +} + +static __init int cpufreq_transition_handler(struct notifier_block *nb, + unsigned long state, void *data) +{ + struct cpufreq_freqs *freqs = data; + unsigned int freq; + + if (state == CPUFREQ_POSTCHANGE && + ipipe_tsc_value && tsc_info.refresh_freq) { + freq = tsc_info.refresh_freq(); + if (freq) { + if (freqs->cpu == 0) { + int oldrate; + tsc_info.freq = freq; + __ipipe_tsc_register(&tsc_info); + __ipipe_report_clockfreq_update(freq); + /* force timekeeper to recalculate the clocksource */ + oldrate = clksrc.rating; + clocksource_change_rating(&clksrc, 0); + clocksource_change_rating(&clksrc, oldrate); + } + smp_call_function_single(freqs->cpu, update_timer_freq, + &freq, 1); + } + } + + return NOTIFY_OK; +} + +static struct notifier_block __initdata cpufreq_nb = { + .notifier_call = cpufreq_transition_handler, +}; + +static __init int register_cpufreq_notifier(void) +{ + cpufreq_register_notifier(&cpufreq_nb, + CPUFREQ_TRANSITION_NOTIFIER); + return 0; +} +core_initcall(register_cpufreq_notifier); + +static __init int unregister_cpufreq_notifier(void) +{ + cpufreq_unregister_notifier(&cpufreq_nb, + CPUFREQ_TRANSITION_NOTIFIER); + return 0; +} +late_initcall(unregister_cpufreq_notifier); + +#endif /* CONFIG_CPUFREQ */ diff -ruN linux-org/arch/arm/kernel/Makefile linux/arch/arm/kernel/Makefile --- linux-org/arch/arm/kernel/Makefile 2022-03-25 09:55:36.309474516 +0100 +++ linux/arch/arm/kernel/Makefile 2022-03-25 10:15:23.341003056 +0100 @@ -87,6 +87,9 @@ head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_RAW_PRINTK) += raw_printk.o +obj-$(CONFIG_IPIPE) += ipipe.o +obj-$(CONFIG_IPIPE_ARM_KUSER_TSC) += ipipe_tsc.o ipipe_tsc_asm.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a diff -ruN linux-org/arch/arm/kernel/perf_callchain.c linux/arch/arm/kernel/perf_callchain.c --- linux-org/arch/arm/kernel/perf_callchain.c 2022-03-25 09:55:36.317474486 +0100 +++ linux/arch/arm/kernel/perf_callchain.c 2022-03-25 10:15:23.341003056 +0100 @@ -105,6 +105,9 @@ return; } + if (IS_ENABLED(CONFIG_IPIPE)) + return; + arm_get_current_stackframe(regs, &fr); walk_stackframe(&fr, callchain_trace, entry); } diff -ruN linux-org/arch/arm/kernel/process.c linux/arch/arm/kernel/process.c --- linux-org/arch/arm/kernel/process.c 2022-03-25 09:55:36.321474470 +0100 +++ linux/arch/arm/kernel/process.c 2022-03-25 10:15:23.341003056 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -57,22 +58,51 @@ "ARM" , "Thumb" , "Jazelle", "ThumbEE" }; -/* - * This is our default idle handler. - */ - void (*arm_pm_idle)(void); -/* - * Called from the core idle loop. - */ +#ifdef CONFIG_IPIPE +static void __ipipe_halt_root(void) +{ + struct ipipe_percpu_domain_data *p; -void arch_cpu_idle(void) + /* + * Emulate idle entry sequence over the root domain, which is + * stalled on entry. + */ + hard_local_irq_disable(); + + p = ipipe_this_cpu_root_context(); + __clear_bit(IPIPE_STALL_FLAG, &p->status); + + if (unlikely(__ipipe_ipending_p(p))) + __ipipe_sync_stage(); + else { + if (arm_pm_idle) + arm_pm_idle(); + else + cpu_do_idle(); + } +} +#else /* !CONFIG_IPIPE */ +static void __ipipe_halt_root(void) { if (arm_pm_idle) arm_pm_idle(); else cpu_do_idle(); +} +#endif /* !CONFIG_IPIPE */ + +/* + * Called from the core idle loop. + */ + +void arch_cpu_idle(void) +{ + if (!need_resched()) + __ipipe_halt_root(); + + /* This will re-enable hard_irqs also with IPIPE */ local_irq_enable(); } diff -ruN linux-org/arch/arm/kernel/ptrace.c linux/arch/arm/kernel/ptrace.c --- linux-org/arch/arm/kernel/ptrace.c 2022-03-25 09:55:36.321474470 +0100 +++ linux/arch/arm/kernel/ptrace.c 2022-03-25 10:15:23.341003056 +0100 @@ -215,6 +215,10 @@ static int break_trap(struct pt_regs *regs, unsigned int instr) { + + if (__ipipe_report_trap(IPIPE_TRAP_BREAK,regs)) + return 0; + ptrace_break(current, regs); return 0; } diff -ruN linux-org/arch/arm/kernel/raw_printk.c linux/arch/arm/kernel/raw_printk.c --- linux-org/arch/arm/kernel/raw_printk.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/kernel/raw_printk.c 2022-03-25 10:15:23.341003056 +0100 @@ -0,0 +1,33 @@ +#include +#include +#include + +void __weak printascii(const char *s) +{ + /* + * Allow building if CONFIG_DEBUG_LL is off but keep silent on + * raw_printk(). + */ +} + +static void raw_console_write(struct console *co, + const char *s, unsigned count) +{ + printascii(s); +} + +static struct console raw_console = { + .name = "rawcon", + .write_raw = raw_console_write, + .flags = CON_PRINTBUFFER | CON_RAW | CON_ENABLED, + .index = -1, +}; + +static int __init raw_console_init(void) +{ + register_console(&raw_console); + + return 0; +} + +console_initcall(raw_console_init); diff -ruN linux-org/arch/arm/kernel/setup.c linux/arch/arm/kernel/setup.c --- linux-org/arch/arm/kernel/setup.c 2022-03-25 09:55:36.321474470 +0100 +++ linux/arch/arm/kernel/setup.c 2022-03-25 10:15:23.341003056 +0100 @@ -32,7 +32,7 @@ #include #include #include - +#include #include #include #include @@ -579,7 +579,17 @@ #endif } -u32 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID }; +u32 __cpu_logical_map[16] = { [0 ... 15] = MPIDR_INVALID }; + +#ifdef CONFIG_IPIPE + +void __init smp_build_cpu_revmap(void) +{ + /* printk on I-pipe needs per cpu data */ + set_my_cpu_offset(per_cpu_offset(0)); +} + +#endif void __init smp_setup_processor_id(void) { diff -ruN linux-org/arch/arm/kernel/signal.c linux/arch/arm/kernel/signal.c --- linux-org/arch/arm/kernel/signal.c 2022-03-25 09:55:36.321474470 +0100 +++ linux/arch/arm/kernel/signal.c 2022-03-25 10:15:23.341003056 +0100 @@ -616,11 +616,13 @@ trace_hardirqs_off(); do { if (likely(thread_flags & _TIF_NEED_RESCHED)) { + local_irq_disable(); + hard_cond_local_irq_enable(); schedule(); } else { if (unlikely(!user_mode(regs))) return 0; - local_irq_enable(); + hard_local_irq_enable(); if (thread_flags & _TIF_SIGPENDING) { int restart = do_signal(regs, syscall); if (unlikely(restart)) { @@ -639,7 +641,7 @@ tracehook_notify_resume(regs); } } - local_irq_disable(); + hard_local_irq_disable(); thread_flags = current_thread_info()->flags; } while (thread_flags & _TIF_WORK_MASK); return 0; diff -ruN linux-org/arch/arm/kernel/smp.c linux/arch/arm/kernel/smp.c --- linux-org/arch/arm/kernel/smp.c 2022-03-25 09:55:36.321474470 +0100 +++ linux/arch/arm/kernel/smp.c 2022-03-25 10:15:23.341003056 +0100 @@ -80,8 +80,23 @@ * not be usable by the kernel. Please keep the above limited * to at most 8 entries. */ +#ifdef CONFIG_IPIPE + IPI_IPIPE_FIRST, +#endif }; +#ifdef CONFIG_IPIPE +#define noipipe_irq_enter() \ + do { \ + } while(0) +#define noipipe_irq_exit() \ + do { \ + } while(0) +#else /* !CONFIG_IPIPE */ +#define noipipe_irq_enter() irq_enter() +#define noipipe_irq_exit() irq_exit() +#endif /* !CONFIG_IPIPE */ + static DECLARE_COMPLETION(cpu_running); static struct smp_operations smp_ops __ro_after_init; @@ -367,6 +382,13 @@ local_flush_bp_all(); enter_lazy_tlb(mm, current); local_flush_tlb_all(); +#ifdef CONFIG_IPIPE + /* + * With CONFIG_IPIPE debug_smp_processor_id requires access + * to percpu data. + */ + set_my_cpu_offset(per_cpu_offset(ipipe_processor_id())); +#endif /* * All kernel threads share the same mm context; grab a @@ -542,6 +564,81 @@ #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + +static inline void ipi_timer(void) +{ + tick_receive_broadcast(); +} + +#endif + +#ifdef CONFIG_IPIPE +#define IPIPE_IPI_BASE IPIPE_VIRQ_BASE + +unsigned __ipipe_first_ipi; +EXPORT_SYMBOL_GPL(__ipipe_first_ipi); + +static void __ipipe_do_IPI(unsigned virq, void *cookie) +{ + enum ipi_msg_type msg = virq - IPIPE_IPI_BASE; + handle_IPI(msg, raw_cpu_ptr(&ipipe_percpu.tick_regs)); +} + +void __ipipe_ipis_alloc(void) +{ + unsigned int virq, ipi, last_ipi; + + /* May be called multiple times via init_stage() */ + if (__ipipe_first_ipi) + return; + + last_ipi = NR_IPI + IPIPE_LAST_IPI; + for (ipi = 0; ipi <= last_ipi; ipi++) { + virq = ipipe_alloc_virq(); + if (ipi == IPI_IPIPE_FIRST) + __ipipe_first_ipi = virq; + } +} + +void __ipipe_ipis_request(void) +{ + unsigned virq; + + for (virq = IPIPE_IPI_BASE; virq < __ipipe_first_ipi; virq++) + ipipe_request_irq(ipipe_root_domain, + virq, + (ipipe_irq_handler_t)__ipipe_do_IPI, + NULL, NULL); +} +void ipipe_send_ipi(unsigned ipi, cpumask_t cpumask) +{ + enum ipi_msg_type msg = ipi - IPIPE_IPI_BASE; + smp_cross_call(&cpumask, msg); +} +EXPORT_SYMBOL_GPL(ipipe_send_ipi); + + /* hw IRQs off */ +asmlinkage void __exception __ipipe_grab_ipi(unsigned svc, struct pt_regs *regs) +{ + int virq = IPIPE_IPI_BASE + svc; + + /* + * Virtual NMIs ignore the root domain's stall + * bit. When caught over high priority + * domains, virtual VMIs are pipelined the + * usual way as normal interrupts. + */ + if (virq == IPIPE_SERVICE_VNMI && __ipipe_root_p) + __ipipe_do_vnmi(IPIPE_SERVICE_VNMI, NULL); + else + __ipipe_dispatch_irq(virq, IPIPE_IRQF_NOACK); + + __ipipe_exit_irq(regs); +} + +#endif /* CONFIG_IPIPE */ + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void tick_broadcast(const struct cpumask *mask) { smp_cross_call(mask, IPI_TIMER); @@ -608,9 +705,9 @@ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST case IPI_TIMER: - irq_enter(); - tick_receive_broadcast(); - irq_exit(); + noipipe_irq_enter(); + ipi_timer(); + noipipe_irq_exit(); break; #endif @@ -619,36 +716,36 @@ break; case IPI_CALL_FUNC: - irq_enter(); + noipipe_irq_enter(); generic_smp_call_function_interrupt(); - irq_exit(); + noipipe_irq_exit(); break; case IPI_CPU_STOP: - irq_enter(); + noipipe_irq_enter(); ipi_cpu_stop(cpu); - irq_exit(); + noipipe_irq_exit(); break; #ifdef CONFIG_IRQ_WORK case IPI_IRQ_WORK: - irq_enter(); + noipipe_irq_enter(); irq_work_run(); - irq_exit(); + noipipe_irq_exit(); break; #endif case IPI_COMPLETION: - irq_enter(); + noipipe_irq_enter(); ipi_complete(cpu); - irq_exit(); + noipipe_irq_exit(); break; case IPI_CPU_BACKTRACE: printk_nmi_enter(); - irq_enter(); + noipipe_irq_enter(); nmi_cpu_backtrace(regs); - irq_exit(); + noipipe_irq_exit(); printk_nmi_exit(); break; diff -ruN linux-org/arch/arm/kernel/smp_twd.c linux/arch/arm/kernel/smp_twd.c --- linux-org/arch/arm/kernel/smp_twd.c 2022-03-25 09:55:36.321474470 +0100 +++ linux/arch/arm/kernel/smp_twd.c 2022-03-25 10:15:23.341003056 +0100 @@ -20,10 +20,14 @@ #include #include #include +#include #include #include +#include +#include #include +#include /* set up by the platform code */ static void __iomem *twd_base; @@ -37,6 +41,36 @@ CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; static int twd_ppi; +#ifdef CONFIG_IPIPE +static DEFINE_PER_CPU(struct ipipe_timer, twd_itimer); + +static void twd_ack(void) +{ + writel_relaxed(1, twd_base + TWD_TIMER_INTSTAT); +} + +static void twd_get_clock(struct device_node *np); +static void twd_calibrate_rate(void); + +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL + +static DEFINE_PER_CPU(int, irqs); + +void twd_hrtimer_debug(unsigned int irq) /* hw interrupt off */ +{ + int cpu = ipipe_processor_id(); + + if ((++per_cpu(irqs, cpu) % HZ) == 0) { +#if 0 + raw_printk("%c", 'A' + cpu); +#else + do { } while (0); +#endif + } +} +#endif /* CONFIG_IPIPE_DEBUG_INTERNAL */ +#endif /* CONFIG_IPIPE */ + static int twd_shutdown(struct clock_event_device *clk) { writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); @@ -191,6 +225,13 @@ #endif +#ifdef CONFIG_IPIPE +static unsigned int twd_refresh_freq(void) +{ + return clk_get_rate(twd_clk); +} +#endif + static void twd_calibrate_rate(void) { unsigned long count; @@ -234,7 +275,11 @@ { struct clock_event_device *evt = dev_id; + if (clockevent_ipipe_stolen(evt)) + goto handle; + if (twd_timer_ack()) { + handle: evt->event_handler(evt); return IRQ_HANDLED; } @@ -303,6 +348,18 @@ clk->tick_resume = twd_shutdown; clk->set_next_event = twd_set_next_event; clk->irq = twd_ppi; + +#ifdef CONFIG_IPIPE + printk(KERN_INFO "I-pipe, %lu.%03lu MHz timer\n", + twd_timer_rate / 1000000, + (twd_timer_rate % 1000000) / 1000); + clk->ipipe_timer = raw_cpu_ptr(&twd_itimer); + clk->ipipe_timer->irq = clk->irq; + clk->ipipe_timer->ack = twd_ack; + clk->ipipe_timer->min_delay_ticks = 0xf; + clk->ipipe_timer->refresh_freq = twd_refresh_freq; +#endif + clk->cpumask = cpumask_of(cpu); clockevents_config_and_register(clk, twd_timer_rate, @@ -356,6 +413,10 @@ else late_time_init = twd_timer_setup; +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL + __ipipe_mach_hrtimer_debug = &twd_hrtimer_debug; +#endif /* CONFIG_IPIPE_DEBUG_INTERNAL */ + return 0; out_free: diff -ruN linux-org/arch/arm/kernel/traps.c linux/arch/arm/kernel/traps.c --- linux-org/arch/arm/kernel/traps.c 2022-03-25 09:55:36.325474456 +0100 +++ linux/arch/arm/kernel/traps.c 2022-03-25 10:15:23.341003056 +0100 @@ -524,6 +524,9 @@ */ asmlinkage void bad_mode(struct pt_regs *regs, int reason) { + if (__ipipe_report_trap(IPIPE_TRAP_UNKNOWN,regs)) + return; + console_verbose(); pr_crit("Bad mode in %s handler detected\n", handler[reason]); @@ -799,10 +802,21 @@ #ifdef CONFIG_KUSER_HELPERS static void __init kuser_init(void *vectors) { +#ifndef CONFIG_IPIPE extern char __kuser_helper_start[], __kuser_helper_end[]; int kuser_sz = __kuser_helper_end - __kuser_helper_start; +#else /* !CONFIG_IPIPE */ + extern char __ipipe_tsc_area_start[], __kuser_helper_end[]; + int kuser_sz = __kuser_helper_end - __ipipe_tsc_area_start; + extern char __vectors_start[], __vectors_end[]; +#endif /* !CONFIG_IPIPE */ +#ifndef CONFIG_IPIPE memcpy(vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz); +#else /* !CONFIG_IPIPE */ + BUG_ON(0x1000 - kuser_sz < __vectors_end - __vectors_start); + memcpy(vectors + 0x1000 - kuser_sz, __ipipe_tsc_area_start, kuser_sz); +#endif /* !CONFIG_IPIPE */ /* * vectors + 0xfe0 = __kuser_get_tls diff -ruN linux-org/arch/arm/kernel/vdso.c linux/arch/arm/kernel/vdso.c --- linux-org/arch/arm/kernel/vdso.c 2022-03-25 09:55:36.325474456 +0100 +++ linux/arch/arm/kernel/vdso.c 2022-03-25 10:15:23.341003056 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -317,6 +318,8 @@ { struct timespec64 *wtm = &tk->wall_to_monotonic; + __ipipe_update_vsyscall(tk); + if (!cntvct_ok) { /* The entry points have been zeroed, so there is no * point in updating the data page. diff -ruN linux-org/arch/arm/mach-davinci/Kconfig linux/arch/arm/mach-davinci/Kconfig --- linux-org/arch/arm/mach-davinci/Kconfig 2022-03-25 09:55:36.341474395 +0100 +++ linux/arch/arm/mach-davinci/Kconfig 2022-03-25 10:15:23.341003056 +0100 @@ -43,6 +43,7 @@ depends on !ARCH_DAVINCI_DMx || (AUTO_ZRELADDR && ARM_PATCH_PHYS_VIRT) select ARCH_DAVINCI_DA8XX select CP_INTC + select IPIPE_ARM_KUSER_TSC if IPIPE config ARCH_DAVINCI_DA8XX bool diff -ruN linux-org/arch/arm/mach-davinci/time.c linux/arch/arm/mach-davinci/time.c --- linux-org/arch/arm/mach-davinci/time.c 2022-03-25 09:55:36.345474381 +0100 +++ linux/arch/arm/mach-davinci/time.c 2022-03-25 10:15:23.341003056 +0100 @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -94,9 +96,15 @@ unsigned long opts; unsigned long flags; void __iomem *base; +#ifdef CONFIG_IPIPE + void *pbase; +#endif /*CONFIG_IPIPE */ unsigned long tim_off; unsigned long prd_off; unsigned long enamode_shift; +#ifdef CONFIG_IPIPE + int irq; +#endif /* CONFIG_IPIPE */ struct irqaction irqaction; }; static struct timer_s timers[]; @@ -241,6 +249,9 @@ t->base = base[timer]; if (!t->base) continue; +#ifdef CONFIG_IPIPE + t->pbase = (void *)dtip[timer].base; +#endif /* CONFIG_IPIPE */ if (IS_TIMER_BOT(t->id)) { t->enamode_shift = 6; @@ -262,6 +273,9 @@ irq = USING_COMPARE(t) ? dtip[i].cmp_irq : irq; setup_irq(irq, &t->irqaction); } +#ifdef CONFIG_IPIPE + t->irq = irq; +#endif /* CONFIG_IPIPE */ } } @@ -332,6 +346,19 @@ return 0; } +#ifdef CONFIG_IPIPE +static struct ipipe_timer davinci_itimer; + +static struct __ipipe_tscinfo tsc_info = { + .type = IPIPE_TSC_TYPE_FREERUNNING, + .u = { + { + .mask = 0xffffffff, + }, + }, +}; +#endif /* CONFIG_IPIPE */ + static struct clock_event_device clockevent_davinci = { .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, @@ -339,6 +366,9 @@ .set_state_shutdown = davinci_shutdown, .set_state_periodic = davinci_set_periodic, .set_state_oneshot = davinci_set_oneshot, +#ifdef CONFIG_IPIPE + .ipipe_timer = &davinci_itimer, +#endif /* CONFIG_IPIPE */ }; @@ -403,6 +433,17 @@ clockevent_davinci.name = id_to_name[timers[TID_CLOCKEVENT].id]; clockevent_davinci.cpumask = cpumask_of(0); +#ifdef CONFIG_IPIPE + tsc_info.freq = davinci_clock_tick_rate; + tsc_info.counter_vaddr = (void *)(timers[TID_CLOCKSOURCE].base + + timers[TID_CLOCKSOURCE].tim_off); + tsc_info.u.counter_paddr = timers[TID_CLOCKSOURCE].pbase + + timers[TID_CLOCKSOURCE].tim_off; + __ipipe_tsc_register(&tsc_info); + + davinci_itimer.irq = timers[TID_CLOCKEVENT].irq; + davinci_itimer.min_delay_ticks = 3; +#endif /* CONFIG_IPIPE */ clockevents_config_and_register(&clockevent_davinci, davinci_clock_tick_rate, 1, 0xfffffffe); diff -ruN linux-org/arch/arm/mach-imx/avic.c linux/arch/arm/mach-imx/avic.c --- linux-org/arch/arm/mach-imx/avic.c 2022-03-25 09:55:36.353474351 +0100 +++ linux/arch/arm/mach-imx/avic.c 2022-03-25 10:15:23.341003056 +0100 @@ -123,6 +123,10 @@ ct->chip.irq_mask = irq_gc_mask_clr_bit; ct->chip.irq_unmask = irq_gc_mask_set_bit; ct->chip.irq_ack = irq_gc_mask_clr_bit; +#ifdef CONFIG_IPIPE + ct->chip.irq_mask_ack = irq_gc_mask_clr_bit; + ct->chip.flags = IRQCHIP_PIPELINE_SAFE; +#endif /* CONFIG_IPIPE */ ct->chip.irq_set_wake = irq_gc_set_wake; ct->chip.irq_suspend = avic_irq_suspend; ct->chip.irq_resume = avic_irq_resume; @@ -141,7 +145,7 @@ if (nivector == 0xffff) break; - handle_domain_irq(domain, nivector, regs); + ipipe_handle_domain_irq(domain, nivector, regs); } while (1); } diff -ruN linux-org/arch/arm/mach-imx/gpc.c linux/arch/arm/mach-imx/gpc.c --- linux-org/arch/arm/mach-imx/gpc.c 2022-03-25 09:55:36.357474335 +0100 +++ linux/arch/arm/mach-imx/gpc.c 2022-03-25 10:15:23.341003056 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ static void __iomem *gpc_base; static u32 gpc_wake_irqs[IMR_NUM]; static u32 gpc_saved_imrs[IMR_NUM]; +static IPIPE_DEFINE_RAW_SPINLOCK(gpc_lock); void imx_gpc_set_arm_power_up_timing(u32 sw2iso, u32 sw) { @@ -54,28 +56,38 @@ void imx_gpc_pre_suspend(bool arm_power_off) { void __iomem *reg_imr1 = gpc_base + GPC_IMR1; + unsigned long flags; int i; /* Tell GPC to power off ARM core when suspend */ if (arm_power_off) imx_gpc_set_arm_power_in_lpm(arm_power_off); + flags = hard_cond_local_irq_save(); + for (i = 0; i < IMR_NUM; i++) { gpc_saved_imrs[i] = readl_relaxed(reg_imr1 + i * 4); writel_relaxed(~gpc_wake_irqs[i], reg_imr1 + i * 4); } + + hard_cond_local_irq_restore(flags); } void imx_gpc_post_resume(void) { void __iomem *reg_imr1 = gpc_base + GPC_IMR1; + unsigned long flags; int i; /* Keep ARM core powered on for other low-power modes */ imx_gpc_set_arm_power_in_lpm(false); + flags = hard_cond_local_irq_save(); + for (i = 0; i < IMR_NUM; i++) writel_relaxed(gpc_saved_imrs[i], reg_imr1 + i * 4); + + hard_cond_local_irq_restore(flags); } static int imx_gpc_irq_set_wake(struct irq_data *d, unsigned int on) @@ -97,22 +109,31 @@ void imx_gpc_mask_all(void) { void __iomem *reg_imr1 = gpc_base + GPC_IMR1; + unsigned long flags; int i; + flags = hard_cond_local_irq_save(); + for (i = 0; i < IMR_NUM; i++) { gpc_saved_imrs[i] = readl_relaxed(reg_imr1 + i * 4); writel_relaxed(~0, reg_imr1 + i * 4); } + hard_cond_local_irq_restore(flags); } void imx_gpc_restore_all(void) { void __iomem *reg_imr1 = gpc_base + GPC_IMR1; + unsigned long flags; int i; + flags = hard_cond_local_irq_save(); + for (i = 0; i < IMR_NUM; i++) writel_relaxed(gpc_saved_imrs[i], reg_imr1 + i * 4); + + hard_cond_local_irq_restore(flags); } void imx_gpc_hwirq_unmask(unsigned int hwirq) @@ -139,16 +160,49 @@ static void imx_gpc_irq_unmask(struct irq_data *d) { + unsigned long flags; + + raw_spin_lock_irqsave(&gpc_lock, flags); imx_gpc_hwirq_unmask(d->hwirq); + __ipipe_spin_unlock_irqbegin(&gpc_lock); irq_chip_unmask_parent(d); + __ipipe_spin_unlock_irqcomplete(flags); } static void imx_gpc_irq_mask(struct irq_data *d) { + unsigned long flags; + + raw_spin_lock_irqsave(&gpc_lock, flags); + /* Parent IC will handle virtual locking */ imx_gpc_hwirq_mask(d->hwirq); + __ipipe_spin_unlock_irqbegin(&gpc_lock); irq_chip_mask_parent(d); + __ipipe_spin_unlock_irqcomplete(flags); } +#ifdef CONFIG_IPIPE + +static void imx_gpc_hold_irq(struct irq_data *d) +{ + raw_spin_lock(&gpc_lock); + imx_gpc_hwirq_mask(d->hwirq); + raw_spin_unlock(&gpc_lock); + irq_chip_hold_parent(d); +} + +static void imx_gpc_release_irq(struct irq_data *d) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&gpc_lock, flags); + imx_gpc_hwirq_unmask(d->hwirq); + raw_spin_unlock_irqrestore(&gpc_lock, flags); + irq_chip_release_parent(d); +} + +#endif /* CONFIG_IPIPE */ + static struct irq_chip imx_gpc_chip = { .name = "GPC", .irq_eoi = irq_chip_eoi_parent, @@ -160,6 +214,11 @@ #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif +#ifdef CONFIG_IPIPE + .irq_hold = imx_gpc_hold_irq, + .irq_release = imx_gpc_release_irq, +#endif + .flags = IRQCHIP_PIPELINE_SAFE, }; static int imx_gpc_domain_translate(struct irq_domain *d, diff -ruN linux-org/arch/arm/mach-imx/mach-imx51.c linux/arch/arm/mach-imx/mach-imx51.c --- linux-org/arch/arm/mach-imx/mach-imx51.c 2022-03-25 09:55:36.369474290 +0100 +++ linux/arch/arm/mach-imx/mach-imx51.c 2022-03-25 10:15:23.341003056 +0100 @@ -59,6 +59,11 @@ static void __init imx51_init_late(void) { mx51_neon_fixup(); +#ifdef CONFIG_IPIPE + /* Allow user-space access to emulated tsc */ + imx_set_aips(IMX_IO_ADDRESS(0x73f00000)); + imx_set_aips(IMX_IO_ADDRESS(0x83f00000)); +#endif imx51_pm_init(); } diff -ruN linux-org/arch/arm/mach-imx/mach-imx53.c linux/arch/arm/mach-imx/mach-imx53.c --- linux-org/arch/arm/mach-imx/mach-imx53.c 2022-03-25 09:55:36.369474290 +0100 +++ linux/arch/arm/mach-imx/mach-imx53.c 2022-03-25 10:15:23.341003056 +0100 @@ -37,6 +37,11 @@ static void __init imx53_init_late(void) { +#ifdef CONFIG_IPIPE + /* Allow user-space access to emulated tsc */ + imx_set_aips(IMX_IO_ADDRESS(0x53f00000)); + imx_set_aips(IMX_IO_ADDRESS(0x63f00000)); +#endif imx53_pm_init(); } diff -ruN linux-org/arch/arm/mach-imx/tzic.c linux/arch/arm/mach-imx/tzic.c --- linux-org/arch/arm/mach-imx/tzic.c 2022-03-25 09:55:36.377474260 +0100 +++ linux/arch/arm/mach-imx/tzic.c 2022-03-25 10:15:23.341003056 +0100 @@ -116,6 +116,10 @@ ct = gc->chip_types; ct->chip.irq_mask = irq_gc_mask_disable_reg; ct->chip.irq_unmask = irq_gc_unmask_enable_reg; +#ifdef CONFIG_IPIPE + ct->chip.irq_mask_ack = irq_gc_mask_disable_reg; + ct->chip.flags = IRQCHIP_PIPELINE_SAFE; +#endif /* CONFIG_IPIPE */ ct->chip.irq_set_wake = irq_gc_set_wake; ct->chip.irq_suspend = tzic_irq_suspend; ct->chip.irq_resume = tzic_irq_resume; @@ -140,7 +144,7 @@ while (stat) { handled = 1; irqofs = fls(stat) - 1; - handle_domain_irq(domain, irqofs + i * 32, regs); + ipipe_handle_domain_irq(domain, irqofs + i * 32, regs); stat &= ~(1 << irqofs); } } @@ -166,8 +170,13 @@ i = imx_readl(tzic_base + TZIC_INTCNTL); imx_writel(0x80010001, tzic_base + TZIC_INTCNTL); +#ifndef CONFIG_IPIPE imx_writel(0x1f, tzic_base + TZIC_PRIOMASK); imx_writel(0x02, tzic_base + TZIC_SYNCCTRL); +#else + imx_writel(0xf0, tzic_base + TZIC_PRIOMASK); + imx_writel(0, tzic_base + TZIC_SYNCCTRL); +#endif for (i = 0; i < 4; i++) imx_writel(0xFFFFFFFF, tzic_base + TZIC_INTSEC0(i)); diff -ruN linux-org/arch/arm/mach-omap2/Kconfig linux/arch/arm/mach-omap2/Kconfig --- linux-org/arch/arm/mach-omap2/Kconfig 2022-03-25 09:55:36.433474049 +0100 +++ linux/arch/arm/mach-omap2/Kconfig 2022-03-25 10:15:23.345003040 +0100 @@ -38,6 +38,7 @@ select ARM_ERRATA_754322 select ARM_ERRATA_775420 select OMAP_INTERCONNECT + select ARM_GLOBAL_TIMER if IPIPE && SMP config SOC_OMAP5 bool "TI OMAP5" diff -ruN linux-org/arch/arm/mach-omap2/omap-wakeupgen.c linux/arch/arm/mach-omap2/omap-wakeupgen.c --- linux-org/arch/arm/mach-omap2/omap-wakeupgen.c 2022-03-25 09:55:36.441474019 +0100 +++ linux/arch/arm/mach-omap2/omap-wakeupgen.c 2022-03-25 10:15:23.345003040 +0100 @@ -52,7 +52,7 @@ static void __iomem *wakeupgen_base; static void __iomem *sar_base; -static DEFINE_RAW_SPINLOCK(wakeupgen_lock); +static IPIPE_DEFINE_RAW_SPINLOCK(wakeupgen_lock); static unsigned int irq_target_cpu[MAX_IRQS]; static unsigned int irq_banks = DEFAULT_NR_REG_BANKS; static unsigned int max_irqs = DEFAULT_IRQS; @@ -153,6 +153,30 @@ irq_chip_unmask_parent(d); } +#ifdef CONFIG_IPIPE + +static void wakeupgen_hold(struct irq_data *d) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&wakeupgen_lock, flags); + _wakeupgen_clear(d->hwirq, irq_target_cpu[d->hwirq]); + raw_spin_unlock_irqrestore(&wakeupgen_lock, flags); + irq_chip_hold_parent(d); +} + +static void wakeupgen_release(struct irq_data *d) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&wakeupgen_lock, flags); + _wakeupgen_set(d->hwirq, irq_target_cpu[d->hwirq]); + raw_spin_unlock_irqrestore(&wakeupgen_lock, flags); + irq_chip_release_parent(d); +} + +#endif + #ifdef CONFIG_HOTPLUG_CPU static DEFINE_PER_CPU(u32 [MAX_NR_REG_BANKS], irqmasks); @@ -447,9 +471,13 @@ .irq_eoi = irq_chip_eoi_parent, .irq_mask = wakeupgen_mask, .irq_unmask = wakeupgen_unmask, +#ifdef CONFIG_IPIPE + .irq_hold = wakeupgen_hold, + .irq_release = wakeupgen_release, +#endif .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_type = irq_chip_set_type_parent, - .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif diff -ruN linux-org/arch/arm/mach-omap2/prm_common.c linux/arch/arm/mach-omap2/prm_common.c --- linux-org/arch/arm/mach-omap2/prm_common.c 2022-03-25 09:55:36.453473973 +0100 +++ linux/arch/arm/mach-omap2/prm_common.c 2022-03-25 10:15:23.345003040 +0100 @@ -145,11 +145,11 @@ /* Serve priority events first */ for_each_set_bit(virtirq, priority_pending, nr_irq) - generic_handle_irq(prcm_irq_setup->base_irq + virtirq); + ipipe_handle_demuxed_irq(prcm_irq_setup->base_irq + virtirq); /* Serve normal events next */ for_each_set_bit(virtirq, pending, nr_irq) - generic_handle_irq(prcm_irq_setup->base_irq + virtirq); + ipipe_handle_demuxed_irq(prcm_irq_setup->base_irq + virtirq); } if (chip->irq_ack) chip->irq_ack(&desc->irq_data); diff -ruN linux-org/arch/arm/mach-omap2/timer.c linux/arch/arm/mach-omap2/timer.c --- linux-org/arch/arm/mach-omap2/timer.c 2022-03-25 09:55:36.457473959 +0100 +++ linux/arch/arm/mach-omap2/timer.c 2022-03-25 10:15:23.345003040 +0100 @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include #include #include @@ -80,11 +82,17 @@ } #endif +static void omap2_gp_timer_ack(void) +{ + __omap_dm_timer_write_status(&clkev, OMAP_TIMER_INT_OVERFLOW); +} + static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id) { struct clock_event_device *evt = &clockevent_gpt; - __omap_dm_timer_write_status(&clkev, OMAP_TIMER_INT_OVERFLOW); + if (!clockevent_ipipe_stolen(evt)) + omap2_gp_timer_ack(); evt->event_handler(evt); return IRQ_HANDLED; @@ -250,11 +258,12 @@ const char *fck_source, const char *property, const char **timer_name, - int posted) + int posted, bool ipipe) { const char *oh_name = NULL; struct device_node *np; struct omap_hwmod *oh; + struct resource mem; struct clk *src; int r = 0; @@ -271,6 +280,9 @@ return -ENXIO; timer->io_base = of_iomap(np, 0); + if (of_address_to_resource(np, 0, &mem)) + mem.start = 0; + timer->phys_base = mem.start; timer->fclk = of_clk_get_by_name(np, "fck"); @@ -304,6 +316,13 @@ omap_hwmod_enable(oh); __omap_dm_timer_init_regs(timer); +#ifdef CONFIG_IPIPE + if (ipipe) { + u32 l = __raw_readl(timer->io_base + OMAP_TIMER_OCP_CFG_OFFSET); + l = (0x3 << 8) | (l & (1 << 5)) | (0x1 << 3) | (1 << 2); + __raw_writel(l, timer->io_base + OMAP_TIMER_OCP_CFG_OFFSET); + } +#endif if (posted) __omap_dm_timer_enable_posted(timer); @@ -324,11 +343,56 @@ } #endif +#ifdef CONFIG_IPIPE + +static struct ipipe_timer omap_shared_itimer = { + .ack = omap2_gp_timer_ack, + .min_delay_ticks = 3, +}; + +#define IPIPE_GPTIMER 3 + +static struct omap_dm_timer itimer; + +static void omap3_itimer_request(struct ipipe_timer *timer, int steal) +{ + __omap_dm_timer_stop(&itimer, 0, itimer.rate); +} + +static int omap3_itimer_set(unsigned long cycles, void *timer) +{ + __omap_dm_timer_load_start(&itimer, OMAP_TIMER_CTRL_ST, + 0xffffffff - cycles, OMAP_TIMER_POSTED); + return 0; +} + +static void omap3_itimer_ack(void) +{ + __omap_dm_timer_write_status(&itimer, OMAP_TIMER_INT_OVERFLOW); + __omap_dm_timer_read_status(&itimer); +} + +static void omap3_itimer_release(struct ipipe_timer *timer) +{ + __omap_dm_timer_stop(&itimer, 0, itimer.rate); +} + +static struct ipipe_timer omap3_itimer = { + .request = omap3_itimer_request, + .set = omap3_itimer_set, + .ack = omap3_itimer_ack, + .release = omap3_itimer_release, + .rating = 100, + .min_delay_ticks = 3, +}; + +#endif /* CONFIG_IPIPE */ + static void __init omap2_gp_clockevent_init(int gptimer_id, const char *fck_source, const char *property) { - int res; + int res, ipipe = false; clkev.id = gptimer_id; clkev.errata = omap_dm_timer_get_errata(); @@ -340,8 +404,32 @@ */ __omap_dm_timer_override_errata(&clkev, OMAP_TIMER_ERRATA_I103_I767); +#ifdef CONFIG_IPIPE + if (cpu_is_omap34xx()) { + itimer.id = IPIPE_GPTIMER; + itimer.errata = omap_dm_timer_get_errata(); + __omap_dm_timer_override_errata(&itimer, + OMAP_TIMER_ERRATA_I103_I767); + res = omap_dm_timer_init_one(&itimer, + "timer_sys_ck", + NULL, + &omap3_itimer.name, + OMAP_TIMER_POSTED, true); + BUG_ON(res); + + __omap_dm_timer_int_enable(&itimer, OMAP_TIMER_INT_OVERFLOW); + omap3_itimer.irq = itimer.irq; + omap3_itimer.freq = itimer.rate; + omap3_itimer.cpumask = cpumask_of(0); + + ipipe_timer_register(&omap3_itimer); + } + if ((cpu_is_omap44xx() && num_possible_cpus() == 1) || soc_is_am33xx()) + ipipe = 1; +#endif /* CONFIG_IPIPE */ + res = omap_dm_timer_init_one(&clkev, fck_source, property, - &clockevent_gpt.name, OMAP_TIMER_POSTED); + &clockevent_gpt.name, OMAP_TIMER_POSTED, ipipe); BUG_ON(res); omap2_gp_timer_irq.dev_id = &clkev; @@ -351,6 +439,13 @@ clockevent_gpt.cpumask = cpu_possible_mask; clockevent_gpt.irq = omap_dm_timer_get_irq(&clkev); +#ifdef CONFIG_IPIPE + if (ipipe) { + omap_shared_itimer.irq = clkev.irq; + omap_shared_itimer.min_delay_ticks = 3; + clockevent_gpt.ipipe_timer = &omap_shared_itimer; + } +#endif clockevents_config_and_register(&clockevent_gpt, clkev.rate, 3, /* Timer internal resynch latency */ 0xffffffff); @@ -439,6 +534,18 @@ return ret; } +#ifdef CONFIG_IPIPE + +static struct __ipipe_tscinfo __maybe_unused tsc_info = { + .type = IPIPE_TSC_TYPE_FREERUNNING, + .u = { + { + .mask = 0xffffffff, + }, + }, +}; +#endif + static void __init omap2_gptimer_clocksource_init(int gptimer_id, const char *fck_source, const char *property) @@ -450,7 +557,7 @@ res = omap_dm_timer_init_one(&clksrc, fck_source, property, &clocksource_gpt.name, - OMAP_TIMER_NONPOSTED); + OMAP_TIMER_NONPOSTED, IS_ENABLED(CONFIG_IPIPE)); BUG_ON(res); __omap_dm_timer_load_start(&clksrc, @@ -458,6 +565,19 @@ OMAP_TIMER_NONPOSTED); sched_clock_register(dmtimer_read_sched_clock, 32, clksrc.rate); +#ifdef CONFIG_IPIPE + { + unsigned long off = OMAP_TIMER_COUNTER_REG & 0xff; + if (clksrc.revision == 2) + off += OMAP_TIMER_V2_FUNC_OFFSET; + + tsc_info.freq = clksrc.rate; + tsc_info.counter_vaddr = (unsigned long)clksrc.io_base + off; + tsc_info.u.counter_paddr = clksrc.phys_base + off; + __ipipe_tsc_register(&tsc_info); + } +#endif + if (clocksource_register_hz(&clocksource_gpt, clksrc.rate)) pr_err("Could not register clocksource %s\n", clocksource_gpt.name); @@ -470,9 +590,16 @@ const char *clkev_prop, int clksrc_nr, const char *clksrc_src, const char *clksrc_prop, bool gptimer) { + const char *clk = clkev_src; + + if (num_possible_cpus() == 1 && !soc_is_omap54xx()) { + use_gptimer_clksrc = true; + if (cpu_is_omap44xx()) + clk = "timer_sys_ck"; + } omap_clk_init(); omap_dmtimer_init(); - omap2_gp_clockevent_init(clkev_nr, clkev_src, clkev_prop); + omap2_gp_clockevent_init(clkev_nr, clk, clkev_prop); /* Enable the use of clocksource="gp_timer" kernel parameter */ if (use_gptimer_clksrc || gptimer) diff -ruN linux-org/arch/arm/mach-socfpga/Kconfig linux/arch/arm/mach-socfpga/Kconfig --- linux-org/arch/arm/mach-socfpga/Kconfig 2022-03-25 09:55:36.489473838 +0100 +++ linux/arch/arm/mach-socfpga/Kconfig 2022-03-25 10:15:23.345003040 +0100 @@ -10,6 +10,7 @@ select HAVE_ARM_SCU select HAVE_ARM_TWD if SMP select MFD_SYSCON + select IPIPE_ARM_KUSER_TSC if IPIPE if ARCH_SOCFPGA config SOCFPGA_SUSPEND diff -ruN linux-org/arch/arm/mach-sti/Kconfig linux/arch/arm/mach-sti/Kconfig --- linux-org/arch/arm/mach-sti/Kconfig 2022-03-25 09:55:36.493473823 +0100 +++ linux/arch/arm/mach-sti/Kconfig 2022-03-25 10:15:23.345003040 +0100 @@ -17,6 +17,7 @@ select PL310_ERRATA_753970 if CACHE_L2X0 select PL310_ERRATA_769419 if CACHE_L2X0 select RESET_CONTROLLER + select IPIPE_ARM_KUSER_TSC if IPIPE help Include support for STMicroelectronics' STiH415/416, STiH407/10 and STiH418 family SoCs using the Device Tree for discovery. More diff -ruN linux-org/arch/arm/mach-sunxi/Kconfig linux/arch/arm/mach-sunxi/Kconfig --- linux-org/arch/arm/mach-sunxi/Kconfig 2022-03-25 09:55:36.493473823 +0100 +++ linux/arch/arm/mach-sunxi/Kconfig 2022-03-25 10:15:23.345003040 +0100 @@ -42,6 +42,7 @@ default ARCH_SUNXI select ARM_GIC select MFD_SUN6I_PRCM + select IPIPE_ARM_KUSER_TSC if IPIPE config MACH_SUN9I bool "Allwinner (sun9i) SoCs support" diff -ruN linux-org/arch/arm/mm/alignment.c linux/arch/arm/mm/alignment.c --- linux-org/arch/arm/mm/alignment.c 2022-03-25 09:55:36.501473793 +0100 +++ linux/arch/arm/mm/alignment.c 2022-03-25 10:15:23.345003040 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -781,7 +782,10 @@ int thumb2_32b = 0; if (interrupts_enabled(regs)) - local_irq_enable(); + hard_local_irq_enable(); + + if (__ipipe_report_trap(IPIPE_TRAP_ALIGNMENT,regs)) + return 0; instrptr = instruction_pointer(regs); @@ -969,7 +973,7 @@ * entry-common.S) and disable the alignment trap only if * there is no work pending for this thread. */ - raw_local_irq_disable(); + hard_local_irq_disable(); if (!(current_thread_info()->flags & _TIF_WORK_MASK)) set_cr(cr_no_alignment); } diff -ruN linux-org/arch/arm/mm/cache-l2x0.c linux/arch/arm/mm/cache-l2x0.c --- linux-org/arch/arm/mm/cache-l2x0.c 2022-03-25 09:55:36.501473793 +0100 +++ linux/arch/arm/mm/cache-l2x0.c 2022-03-25 10:15:23.345003040 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -48,9 +49,23 @@ #define CACHE_LINE_SIZE 32 +#ifdef CONFIG_IPIPE +#define CACHE_RANGE_ATOMIC_MAX 512UL +static int l2x0_wa = -1; +static int __init l2x0_setup_wa(char *str) +{ + l2x0_wa = !!simple_strtol(str, NULL, 0); + return 0; +} +early_param("l2x0_write_allocate", l2x0_setup_wa); +#else +#define CACHE_RANGE_ATOMIC_MAX 4096UL +static int l2x0_wa = 1; +#endif + static void __iomem *l2x0_base; static const struct l2c_init_data *l2x0_data; -static DEFINE_RAW_SPINLOCK(l2x0_lock); +static IPIPE_DEFINE_RAW_SPINLOCK(l2x0_lock); static u32 l2x0_way_mask; /* Bitmask of active ways */ static u32 l2x0_size; static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; @@ -296,10 +311,10 @@ static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, unsigned long end, unsigned long flags) { - raw_spinlock_t *lock = &l2x0_lock; + typeof(l2x0_lock) *lock = &l2x0_lock; while (start < end) { - unsigned long blk_end = start + min(end - start, 4096UL); + unsigned long blk_end = start + min(end - start, CACHE_RANGE_ATOMIC_MAX); while (start < blk_end) { l2c_wait_mask(reg, 1); @@ -510,13 +525,13 @@ static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) { - raw_spinlock_t *lock = &l2x0_lock; + typeof(l2x0_lock) *lock = &l2x0_lock; unsigned long flags; void __iomem *base = l2x0_base; raw_spin_lock_irqsave(lock, flags); while (start < end) { - unsigned long blk_end = start + min(end - start, 4096UL); + unsigned long blk_end = start + min(end - start, CACHE_RANGE_ATOMIC_MAX); l2c_set_debug(base, 0x03); while (start < blk_end) { @@ -812,6 +827,28 @@ if (aux_val & aux_mask) pr_alert("L2C: platform provided aux values permit register corruption.\n"); + if (IS_ENABLED(CONFIG_IPIPE)) { + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + case L2X0_CACHE_ID_PART_L310: + if ((cache_id & L2X0_CACHE_ID_RTL_MASK) + >= L310_CACHE_ID_RTL_R3P2) { + l2x0_wa = 1; + pr_alert("L2C: I-pipe: revision >= L310-r3p2 detected, forcing WA.\n"); + } + case L2X0_CACHE_ID_PART_L220: + if (l2x0_wa < 0) { + l2x0_wa = 0; + pr_alert("L2C: I-pipe: l2x0_write_allocate= not specified, defaults to 0 (disabled).\n"); + } + if (!l2x0_wa) { + aux_mask &= ~L220_AUX_CTRL_FWA_MASK; + aux_val &= ~L220_AUX_CTRL_FWA_MASK; + aux_val |= 1 << L220_AUX_CTRL_FWA_SHIFT; + } else + pr_alert("L2C: I-pipe: write-allocate enabled, induces high latencies.\n"); + } + } + old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; diff -ruN linux-org/arch/arm/mm/context.c linux/arch/arm/mm/context.c --- linux-org/arch/arm/mm/context.c 2022-03-25 09:55:36.505473778 +0100 +++ linux/arch/arm/mm/context.c 2022-03-25 10:15:23.345003040 +0100 @@ -42,7 +42,7 @@ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) #define NUM_USER_ASIDS ASID_FIRST_VERSION -static DEFINE_RAW_SPINLOCK(cpu_asid_lock); +static IPIPE_DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); @@ -237,15 +237,18 @@ return asid | generation; } -void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) +int check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk, bool root_p) { unsigned long flags; - unsigned int cpu = smp_processor_id(); + unsigned int cpu = ipipe_processor_id(); u64 asid; if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) __check_vmalloc_seq(mm); +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + flags = hard_local_irq_save(); +#endif /* CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ /* * We cannot update the pgd and the ASID atomicly with classic * MMU, so switch exclusively to global mappings to avoid @@ -258,7 +261,11 @@ && atomic64_xchg(&per_cpu(active_asids, cpu), asid)) goto switch_mm_fastpath; +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + raw_spin_lock(&cpu_asid_lock); +#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ raw_spin_lock_irqsave(&cpu_asid_lock, flags); +#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ /* Check that our ASID belongs to the current generation. */ asid = atomic64_read(&mm->context.id); if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) { @@ -273,8 +280,17 @@ atomic64_set(&per_cpu(active_asids, cpu), asid); cpumask_set_cpu(cpu, mm_cpumask(mm)); +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + raw_spin_unlock(&cpu_asid_lock); +#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +#endif /* CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ switch_mm_fastpath: cpu_switch_mm(mm->pgd, mm); +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + hard_local_irq_restore(flags); +#endif /* CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + + return 0; } diff -ruN linux-org/arch/arm/mm/fault.c linux/arch/arm/mm/fault.c --- linux-org/arch/arm/mm/fault.c 2022-03-25 09:55:36.505473778 +0100 +++ linux/arch/arm/mm/fault.c 2022-03-25 10:15:23.345003040 +0100 @@ -31,6 +31,39 @@ #ifdef CONFIG_MMU +#ifdef CONFIG_IPIPE + +static inline unsigned long ipipe_fault_entry(void) +{ + unsigned long flags; + int s; + + flags = hard_local_irq_save(); + s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); + hard_local_irq_enable(); + + return arch_mangle_irq_bits(s, flags); +} + +static inline void ipipe_fault_exit(unsigned long x) +{ + if (!arch_demangle_irq_bits(&x)) + local_irq_enable(); + else + hard_local_irq_restore(x); +} + +#else + +static inline unsigned long ipipe_fault_entry(void) +{ + return 0; +} + +static inline void ipipe_fault_exit(unsigned long x) { } + +#endif + #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) { @@ -262,10 +295,16 @@ struct mm_struct *mm; int fault, sig, code; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + unsigned long irqflags; - if (notify_page_fault(regs, fsr)) + if (__ipipe_report_trap(IPIPE_TRAP_ACCESS, regs)) return 0; + irqflags = ipipe_fault_entry(); + + if (notify_page_fault(regs, fsr)) + goto out; + tsk = current; mm = tsk->mm; @@ -318,7 +357,7 @@ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { if (!user_mode(regs)) goto no_context; - return 0; + goto out; } /* @@ -353,7 +392,7 @@ * Handle the "normal" case first - VM_FAULT_MAJOR */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) - return 0; + goto out; /* * If we are in kernel mode at this point, we @@ -369,7 +408,7 @@ * got oom-killed) */ pagefault_out_of_memory(); - return 0; + goto out; } if (fault & VM_FAULT_SIGBUS) { @@ -390,10 +429,13 @@ } __do_user_fault(tsk, addr, fsr, sig, code, regs); - return 0; + goto out; no_context: __do_kernel_fault(mm, addr, fsr, regs); +out: + ipipe_fault_exit(irqflags); + return 0; } #else /* CONFIG_MMU */ @@ -426,11 +468,14 @@ do_translation_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + unsigned long irqflags; unsigned int index; pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; + IPIPE_BUG_ON(!hard_irqs_disabled()); + if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); @@ -478,10 +523,19 @@ goto bad_area; copy_pmd(pmd, pmd_k); + return 0; bad_area: + if (__ipipe_report_trap(IPIPE_TRAP_ACCESS, regs)) + return 0; + + irqflags = ipipe_fault_entry(); + do_bad_area(addr, fsr, regs); + + ipipe_fault_exit(irqflags); + return 0; } #else /* CONFIG_MMU */ @@ -501,7 +555,17 @@ static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + unsigned long irqflags; + + if (__ipipe_report_trap(IPIPE_TRAP_SECTION, regs)) + return 0; + + irqflags = ipipe_fault_entry(); + do_bad_area(addr, fsr, regs); + + ipipe_fault_exit(irqflags); + return 0; } #endif /* CONFIG_ARM_LPAE */ @@ -512,6 +576,9 @@ static int do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + if (__ipipe_report_trap(IPIPE_TRAP_DABT,regs)) + return 0; + return 1; } @@ -549,11 +616,17 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + fsr_fs(fsr); + unsigned long irqflags; struct siginfo info; if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) return; + if (__ipipe_report_trap(IPIPE_TRAP_UNKNOWN, regs)) + return; + + irqflags = ipipe_fault_entry(); + pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n", inf->name, fsr, addr); show_pte(current->mm, addr); @@ -563,6 +636,8 @@ info.si_code = inf->code; info.si_addr = (void __user *)addr; arm_notify_die("", regs, &info, fsr, 0); + + ipipe_fault_exit(irqflags); } void __init @@ -582,11 +657,17 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) { const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); + unsigned long irqflags; struct siginfo info; if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) return; + if (__ipipe_report_trap(IPIPE_TRAP_UNKNOWN, regs)) + return; + + irqflags = ipipe_fault_entry(); + pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", inf->name, ifsr, addr); @@ -595,6 +676,8 @@ info.si_code = inf->code; info.si_addr = (void __user *)addr; arm_notify_die("", regs, &info, ifsr, 0); + + ipipe_fault_exit(irqflags); } /* diff -ruN linux-org/arch/arm/mm/Kconfig linux/arch/arm/mm/Kconfig --- linux-org/arch/arm/mm/Kconfig 2022-03-25 09:55:36.501473793 +0100 +++ linux/arch/arm/mm/Kconfig 2022-03-25 10:15:23.345003040 +0100 @@ -836,6 +836,7 @@ config NEED_KUSER_HELPERS bool + default y if IPIPE config KUSER_HELPERS bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS diff -ruN linux-org/arch/arm/plat-omap/dmtimer.c linux/arch/arm/plat-omap/dmtimer.c --- linux-org/arch/arm/plat-omap/dmtimer.c 2022-03-25 09:55:36.517473732 +0100 +++ linux/arch/arm/plat-omap/dmtimer.c 2022-03-25 10:15:23.345003040 +0100 @@ -395,6 +395,18 @@ } EXPORT_SYMBOL_GPL(omap_dm_timer_get_irq); +#ifdef CONFIG_IPIPE +unsigned long omap_dm_timer_get_phys_counter_addr(struct omap_dm_timer *timer) +{ + return timer->phys_base + (OMAP_TIMER_COUNTER_REG & 0xff); +} + +unsigned long omap_dm_timer_get_virt_counter_addr(struct omap_dm_timer *timer) +{ + return (unsigned long)timer->io_base + (OMAP_TIMER_COUNTER_REG & 0xff); +} +#endif /* CONFIG_IPIPE */ + #if defined(CONFIG_ARCH_OMAP1) #include /** @@ -572,7 +584,7 @@ EXPORT_SYMBOL_GPL(omap_dm_timer_set_source); int omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload, - unsigned int load) + unsigned int load) { u32 l; @@ -599,7 +611,7 @@ /* Optimized set_load which removes costly spin wait in timer_start */ int omap_dm_timer_set_load_start(struct omap_dm_timer *timer, int autoreload, - unsigned int load) + unsigned int load) { u32 l; @@ -864,6 +876,7 @@ } timer->fclk = ERR_PTR(-ENODEV); + timer->phys_base = mem->start; timer->io_base = devm_ioremap_resource(dev, mem); if (IS_ERR(timer->io_base)) return PTR_ERR(timer->io_base); diff -ruN linux-org/arch/arm/plat-omap/include/plat/dmtimer.h linux/arch/arm/plat-omap/include/plat/dmtimer.h --- linux-org/arch/arm/plat-omap/include/plat/dmtimer.h 2022-03-25 09:55:36.517473732 +0100 +++ linux/arch/arm/plat-omap/include/plat/dmtimer.h 2022-03-25 10:15:23.345003040 +0100 @@ -104,6 +104,7 @@ int irq; struct clk *fclk; + unsigned long phys_base; void __iomem *io_base; void __iomem *irq_stat; /* TISR/IRQSTATUS interrupt status */ void __iomem *irq_ena; /* irq enable */ @@ -415,4 +416,9 @@ writel_relaxed(value, timer->irq_stat); } +static inline unsigned long __omap_dm_timer_read_status(struct omap_dm_timer *timer) +{ + return __raw_readl(timer->irq_stat); +} + #endif /* __ASM_ARCH_DMTIMER_H */ diff -ruN linux-org/arch/arm/vfp/entry.S linux/arch/arm/vfp/entry.S --- linux-org/arch/arm/vfp/entry.S 2022-03-25 09:55:36.529473688 +0100 +++ linux/arch/arm/vfp/entry.S 2022-03-25 10:15:23.345003040 +0100 @@ -26,6 +26,7 @@ @ ENTRY(do_vfp) inc_preempt_count r10, r4 + disable_irq_cond ldr r4, .LCvfp ldr r11, [r10, #TI_CPU] @ CPU number add r10, r10, #TI_VFPSTATE @ r10 = workspace @@ -33,6 +34,7 @@ ENDPROC(do_vfp) ENTRY(vfp_null_entry) + enable_irq dec_preempt_count_ti r10, r4 ret lr ENDPROC(vfp_null_entry) @@ -46,6 +48,7 @@ __INIT ENTRY(vfp_testing_entry) + enable_irq dec_preempt_count_ti r10, r4 ldr r0, VFP_arch_address str r0, [r0] @ set to non-zero value diff -ruN linux-org/arch/arm/vfp/vfphw.S linux/arch/arm/vfp/vfphw.S --- linux-org/arch/arm/vfp/vfphw.S 2022-03-25 09:55:36.529473688 +0100 +++ linux/arch/arm/vfp/vfphw.S 2022-03-25 10:15:23.345003040 +0100 @@ -177,6 +177,7 @@ @ out before setting an FPEXC that @ stops us reading stuff VFPFMXR FPEXC, r1 @ Restore FPEXC last + enable_irq_cond sub r2, r2, #4 @ Retry current instruction - if Thumb str r2, [sp, #S_PC] @ mode it's two 16-bit instructions, @ else it's one 32-bit instruction, so @@ -206,6 +207,7 @@ @ Fall into hand on to next handler - appropriate coproc instr @ not recognised by VFP + enable_irq_cond DBGSTR "not VFP" dec_preempt_count_ti r10, r4 ret lr diff -ruN linux-org/arch/arm/vfp/vfpmodule.c linux/arch/arm/vfp/vfpmodule.c --- linux-org/arch/arm/vfp/vfpmodule.c 2022-03-25 09:55:36.529473688 +0100 +++ linux/arch/arm/vfp/vfpmodule.c 2022-03-25 10:15:23.345003040 +0100 @@ -93,6 +93,7 @@ static void vfp_thread_flush(struct thread_info *thread) { union vfp_state *vfp = &thread->vfpstate; + unsigned long flags; unsigned int cpu; /* @@ -103,11 +104,11 @@ * Do this first to ensure that preemption won't overwrite our * state saving should access to the VFP be enabled at this point. */ - cpu = get_cpu(); + cpu = __ipipe_get_cpu(flags); if (vfp_current_hw_state[cpu] == vfp) vfp_current_hw_state[cpu] = NULL; fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); - put_cpu(); + __ipipe_put_cpu(flags); memset(vfp, 0, sizeof(union vfp_state)); @@ -122,11 +123,12 @@ { /* release case: Per-thread VFP cleanup. */ union vfp_state *vfp = &thread->vfpstate; - unsigned int cpu = get_cpu(); + unsigned long flags; + unsigned int cpu = __ipipe_get_cpu(flags); if (vfp_current_hw_state[cpu] == vfp) vfp_current_hw_state[cpu] = NULL; - put_cpu(); + __ipipe_put_cpu(flags); } static void vfp_thread_copy(struct thread_info *thread) @@ -162,6 +164,7 @@ static int vfp_notifier(struct notifier_block *self, unsigned long cmd, void *v) { struct thread_info *thread = v; + unsigned long flags; u32 fpexc; #ifdef CONFIG_SMP unsigned int cpu; @@ -169,8 +172,9 @@ switch (cmd) { case THREAD_NOTIFY_SWITCH: - fpexc = fmrx(FPEXC); + flags = hard_cond_local_irq_save(); + fpexc = fmrx(FPEXC); #ifdef CONFIG_SMP cpu = thread->cpu; @@ -191,6 +195,7 @@ * old state. */ fmxr(FPEXC, fpexc & ~FPEXC_EN); + hard_cond_local_irq_restore(flags); break; case THREAD_NOTIFY_FLUSH: @@ -334,7 +339,7 @@ */ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) { - u32 fpscr, orig_fpscr, fpsid, exceptions; + u32 fpscr, orig_fpscr, fpsid, exceptions, next_trigger = 0; pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); @@ -364,6 +369,7 @@ /* * Synchronous exception, emulate the trigger instruction */ + hard_cond_local_irq_enable(); goto emulate; } @@ -376,7 +382,18 @@ trigger = fmrx(FPINST); regs->ARM_pc -= 4; #endif - } else if (!(fpexc & FPEXC_DEX)) { + if (fpexc & FPEXC_FP2V) { + /* + * The barrier() here prevents fpinst2 being read + * before the condition above. + */ + barrier(); + next_trigger = fmrx(FPINST2); + } + } + hard_cond_local_irq_enable(); + + if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) { /* * Illegal combination of bits. It can be caused by an * unallocated VFP instruction but with FPSCR.IXE set and not @@ -416,18 +433,14 @@ if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) goto exit; - /* - * The barrier() here prevents fpinst2 being read - * before the condition above. - */ - barrier(); - trigger = fmrx(FPINST2); + trigger = next_trigger; emulate: exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); if (exceptions) vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); exit: + hard_cond_local_irq_enable(); preempt_enable(); } @@ -530,7 +543,8 @@ */ void vfp_sync_hwstate(struct thread_info *thread) { - unsigned int cpu = get_cpu(); + unsigned long flags; + unsigned int cpu = __ipipe_get_cpu(flags); if (vfp_state_in_hw(cpu, thread)) { u32 fpexc = fmrx(FPEXC); @@ -544,17 +558,18 @@ fmxr(FPEXC, fpexc); } - put_cpu(); + __ipipe_put_cpu(flags); } /* Ensure that the thread reloads the hardware VFP state on the next use. */ void vfp_flush_hwstate(struct thread_info *thread) { - unsigned int cpu = get_cpu(); + unsigned long flags; + unsigned int cpu = __ipipe_get_cpu(flags); vfp_force_reload(cpu, thread); - put_cpu(); + __ipipe_put_cpu(flags); } /* diff -ruN linux-org/arch/arm/vfp/vfpmodule.c.orig linux/arch/arm/vfp/vfpmodule.c.orig --- linux-org/arch/arm/vfp/vfpmodule.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux/arch/arm/vfp/vfpmodule.c.orig 2022-03-25 10:14:30.541201939 +0100 @@ -0,0 +1,839 @@ +/* + * linux/arch/arm/vfp/vfpmodule.c + * + * Copyright (C) 2004 ARM Limited. + * Written by Deep Blue Solutions Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "vfpinstr.h" +#include "vfp.h" + +/* + * Our undef handlers (in entry.S) + */ +asmlinkage void vfp_testing_entry(void); +asmlinkage void vfp_support_entry(void); +asmlinkage void vfp_null_entry(void); + +asmlinkage void (*vfp_vector)(void) = vfp_null_entry; + +/* + * Dual-use variable. + * Used in startup: set to non-zero if VFP checks fail + * After startup, holds VFP architecture + */ +unsigned int VFP_arch; + +/* + * The pointer to the vfpstate structure of the thread which currently + * owns the context held in the VFP hardware, or NULL if the hardware + * context is invalid. + * + * For UP, this is sufficient to tell which thread owns the VFP context. + * However, for SMP, we also need to check the CPU number stored in the + * saved state too to catch migrations. + */ +union vfp_state *vfp_current_hw_state[NR_CPUS]; + +/* + * Is 'thread's most up to date state stored in this CPUs hardware? + * Must be called from non-preemptible context. + */ +static bool vfp_state_in_hw(unsigned int cpu, struct thread_info *thread) +{ +#ifdef CONFIG_SMP + if (thread->vfpstate.hard.cpu != cpu) + return false; +#endif + return vfp_current_hw_state[cpu] == &thread->vfpstate; +} + +/* + * Force a reload of the VFP context from the thread structure. We do + * this by ensuring that access to the VFP hardware is disabled, and + * clear vfp_current_hw_state. Must be called from non-preemptible context. + */ +static void vfp_force_reload(unsigned int cpu, struct thread_info *thread) +{ + if (vfp_state_in_hw(cpu, thread)) { + fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); + vfp_current_hw_state[cpu] = NULL; + } +#ifdef CONFIG_SMP + thread->vfpstate.hard.cpu = NR_CPUS; +#endif +} + +/* + * Per-thread VFP initialization. + */ +static void vfp_thread_flush(struct thread_info *thread) +{ + union vfp_state *vfp = &thread->vfpstate; + unsigned int cpu; + + /* + * Disable VFP to ensure we initialize it first. We must ensure + * that the modification of vfp_current_hw_state[] and hardware + * disable are done for the same CPU and without preemption. + * + * Do this first to ensure that preemption won't overwrite our + * state saving should access to the VFP be enabled at this point. + */ + cpu = get_cpu(); + if (vfp_current_hw_state[cpu] == vfp) + vfp_current_hw_state[cpu] = NULL; + fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); + put_cpu(); + + memset(vfp, 0, sizeof(union vfp_state)); + + vfp->hard.fpexc = FPEXC_EN; + vfp->hard.fpscr = FPSCR_ROUND_NEAREST; +#ifdef CONFIG_SMP + vfp->hard.cpu = NR_CPUS; +#endif +} + +static void vfp_thread_exit(struct thread_info *thread) +{ + /* release case: Per-thread VFP cleanup. */ + union vfp_state *vfp = &thread->vfpstate; + unsigned int cpu = get_cpu(); + + if (vfp_current_hw_state[cpu] == vfp) + vfp_current_hw_state[cpu] = NULL; + put_cpu(); +} + +static void vfp_thread_copy(struct thread_info *thread) +{ + struct thread_info *parent = current_thread_info(); + + vfp_sync_hwstate(parent); + thread->vfpstate = parent->vfpstate; +#ifdef CONFIG_SMP + thread->vfpstate.hard.cpu = NR_CPUS; +#endif +} + +/* + * When this function is called with the following 'cmd's, the following + * is true while this function is being run: + * THREAD_NOFTIFY_SWTICH: + * - the previously running thread will not be scheduled onto another CPU. + * - the next thread to be run (v) will not be running on another CPU. + * - thread->cpu is the local CPU number + * - not preemptible as we're called in the middle of a thread switch + * THREAD_NOTIFY_FLUSH: + * - the thread (v) will be running on the local CPU, so + * v === current_thread_info() + * - thread->cpu is the local CPU number at the time it is accessed, + * but may change at any time. + * - we could be preempted if tree preempt rcu is enabled, so + * it is unsafe to use thread->cpu. + * THREAD_NOTIFY_EXIT + * - we could be preempted if tree preempt rcu is enabled, so + * it is unsafe to use thread->cpu. + */ +static int vfp_notifier(struct notifier_block *self, unsigned long cmd, void *v) +{ + struct thread_info *thread = v; + u32 fpexc; +#ifdef CONFIG_SMP + unsigned int cpu; +#endif + + switch (cmd) { + case THREAD_NOTIFY_SWITCH: + fpexc = fmrx(FPEXC); + +#ifdef CONFIG_SMP + cpu = thread->cpu; + + /* + * On SMP, if VFP is enabled, save the old state in + * case the thread migrates to a different CPU. The + * restoring is done lazily. + */ + if ((fpexc & FPEXC_EN) && vfp_current_hw_state[cpu]) { + /* vfp_save_state oopses on VFP11 if EX bit set */ + fmxr(FPEXC, fpexc & ~FPEXC_EX); + vfp_save_state(vfp_current_hw_state[cpu], fpexc); + } +#endif + + /* + * Always disable VFP so we can lazily save/restore the + * old state. + */ + fmxr(FPEXC, fpexc & ~FPEXC_EN); + break; + + case THREAD_NOTIFY_FLUSH: + vfp_thread_flush(thread); + break; + + case THREAD_NOTIFY_EXIT: + vfp_thread_exit(thread); + break; + + case THREAD_NOTIFY_COPY: + vfp_thread_copy(thread); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block vfp_notifier_block = { + .notifier_call = vfp_notifier, +}; + +/* + * Raise a SIGFPE for the current process. + * sicode describes the signal being raised. + */ +static void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs) +{ + siginfo_t info; + + memset(&info, 0, sizeof(info)); + + info.si_signo = SIGFPE; + info.si_code = sicode; + info.si_addr = (void __user *)(instruction_pointer(regs) - 4); + + /* + * This is the same as NWFPE, because it's not clear what + * this is used for + */ + current->thread.error_code = 0; + current->thread.trap_no = 6; + + send_sig_info(SIGFPE, &info, current); +} + +static void vfp_panic(char *reason, u32 inst) +{ + int i; + + pr_err("VFP: Error: %s\n", reason); + pr_err("VFP: EXC 0x%08x SCR 0x%08x INST 0x%08x\n", + fmrx(FPEXC), fmrx(FPSCR), inst); + for (i = 0; i < 32; i += 2) + pr_err("VFP: s%2u: 0x%08x s%2u: 0x%08x\n", + i, vfp_get_float(i), i+1, vfp_get_float(i+1)); +} + +/* + * Process bitmask of exception conditions. + */ +static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs) +{ + int si_code = 0; + + pr_debug("VFP: raising exceptions %08x\n", exceptions); + + if (exceptions == VFP_EXCEPTION_ERROR) { + vfp_panic("unhandled bounce", inst); + vfp_raise_sigfpe(0, regs); + return; + } + + /* + * If any of the status flags are set, update the FPSCR. + * Comparison instructions always return at least one of + * these flags set. + */ + if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) + fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V); + + fpscr |= exceptions; + + fmxr(FPSCR, fpscr); + +#define RAISE(stat,en,sig) \ + if (exceptions & stat && fpscr & en) \ + si_code = sig; + + /* + * These are arranged in priority order, least to highest. + */ + RAISE(FPSCR_DZC, FPSCR_DZE, FPE_FLTDIV); + RAISE(FPSCR_IXC, FPSCR_IXE, FPE_FLTRES); + RAISE(FPSCR_UFC, FPSCR_UFE, FPE_FLTUND); + RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF); + RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV); + + if (si_code) + vfp_raise_sigfpe(si_code, regs); +} + +/* + * Emulate a VFP instruction. + */ +static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) +{ + u32 exceptions = VFP_EXCEPTION_ERROR; + + pr_debug("VFP: emulate: INST=0x%08x SCR=0x%08x\n", inst, fpscr); + + if (INST_CPRTDO(inst)) { + if (!INST_CPRT(inst)) { + /* + * CPDO + */ + if (vfp_single(inst)) { + exceptions = vfp_single_cpdo(inst, fpscr); + } else { + exceptions = vfp_double_cpdo(inst, fpscr); + } + } else { + /* + * A CPRT instruction can not appear in FPINST2, nor + * can it cause an exception. Therefore, we do not + * have to emulate it. + */ + } + } else { + /* + * A CPDT instruction can not appear in FPINST2, nor can + * it cause an exception. Therefore, we do not have to + * emulate it. + */ + } + return exceptions & ~VFP_NAN_FLAG; +} + +/* + * Package up a bounce condition. + */ +void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) +{ + u32 fpscr, orig_fpscr, fpsid, exceptions; + + pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); + + /* + * At this point, FPEXC can have the following configuration: + * + * EX DEX IXE + * 0 1 x - synchronous exception + * 1 x 0 - asynchronous exception + * 1 x 1 - sychronous on VFP subarch 1 and asynchronous on later + * 0 0 1 - synchronous on VFP9 (non-standard subarch 1 + * implementation), undefined otherwise + * + * Clear various bits and enable access to the VFP so we can + * handle the bounce. + */ + fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_DEX|FPEXC_FP2V|FPEXC_VV|FPEXC_TRAP_MASK)); + + fpsid = fmrx(FPSID); + orig_fpscr = fpscr = fmrx(FPSCR); + + /* + * Check for the special VFP subarch 1 and FPSCR.IXE bit case + */ + if ((fpsid & FPSID_ARCH_MASK) == (1 << FPSID_ARCH_BIT) + && (fpscr & FPSCR_IXE)) { + /* + * Synchronous exception, emulate the trigger instruction + */ + goto emulate; + } + + if (fpexc & FPEXC_EX) { +#ifndef CONFIG_CPU_FEROCEON + /* + * Asynchronous exception. The instruction is read from FPINST + * and the interrupted instruction has to be restarted. + */ + trigger = fmrx(FPINST); + regs->ARM_pc -= 4; +#endif + } else if (!(fpexc & FPEXC_DEX)) { + /* + * Illegal combination of bits. It can be caused by an + * unallocated VFP instruction but with FPSCR.IXE set and not + * on VFP subarch 1. + */ + vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs); + goto exit; + } + + /* + * Modify fpscr to indicate the number of iterations remaining. + * If FPEXC.EX is 0, FPEXC.DEX is 1 and the FPEXC.VV bit indicates + * whether FPEXC.VECITR or FPSCR.LEN is used. + */ + if (fpexc & (FPEXC_EX | FPEXC_VV)) { + u32 len; + + len = fpexc + (1 << FPEXC_LENGTH_BIT); + + fpscr &= ~FPSCR_LENGTH_MASK; + fpscr |= (len & FPEXC_LENGTH_MASK) << (FPSCR_LENGTH_BIT - FPEXC_LENGTH_BIT); + } + + /* + * Handle the first FP instruction. We used to take note of the + * FPEXC bounce reason, but this appears to be unreliable. + * Emulate the bounced instruction instead. + */ + exceptions = vfp_emulate_instruction(trigger, fpscr, regs); + if (exceptions) + vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); + + /* + * If there isn't a second FP instruction, exit now. Note that + * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1. + */ + if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) + goto exit; + + /* + * The barrier() here prevents fpinst2 being read + * before the condition above. + */ + barrier(); + trigger = fmrx(FPINST2); + + emulate: + exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); + if (exceptions) + vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); + exit: + preempt_enable(); +} + +static void vfp_enable(void *unused) +{ + u32 access; + + BUG_ON(preemptible()); + access = get_copro_access(); + + /* + * Enable full access to VFP (cp10 and cp11) + */ + set_copro_access(access | CPACC_FULL(10) | CPACC_FULL(11)); +} + +/* Called by platforms on which we want to disable VFP because it may not be + * present on all CPUs within a SMP complex. Needs to be called prior to + * vfp_init(). + */ +void vfp_disable(void) +{ + if (VFP_arch) { + pr_debug("%s: should be called prior to vfp_init\n", __func__); + return; + } + VFP_arch = 1; +} + +#ifdef CONFIG_CPU_PM +static int vfp_pm_suspend(void) +{ + struct thread_info *ti = current_thread_info(); + u32 fpexc = fmrx(FPEXC); + + /* if vfp is on, then save state for resumption */ + if (fpexc & FPEXC_EN) { + pr_debug("%s: saving vfp state\n", __func__); + /* vfp_save_state oopses on VFP11 if EX bit set */ + fmxr(FPEXC, fpexc & ~FPEXC_EX); + vfp_save_state(&ti->vfpstate, fpexc); + + /* disable, just in case */ + fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); + } else if (vfp_current_hw_state[ti->cpu]) { +#ifndef CONFIG_SMP + /* vfp_save_state oopses on VFP11 if EX bit set */ + fmxr(FPEXC, (fpexc & ~FPEXC_EX) | FPEXC_EN); + vfp_save_state(vfp_current_hw_state[ti->cpu], fpexc); + fmxr(FPEXC, fpexc); +#endif + } + + /* clear any information we had about last context state */ + vfp_current_hw_state[ti->cpu] = NULL; + + return 0; +} + +static void vfp_pm_resume(void) +{ + /* ensure we have access to the vfp */ + vfp_enable(NULL); + + /* and disable it to ensure the next usage restores the state */ + fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); +} + +static int vfp_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + switch (cmd) { + case CPU_PM_ENTER: + vfp_pm_suspend(); + break; + case CPU_PM_ENTER_FAILED: + case CPU_PM_EXIT: + vfp_pm_resume(); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block vfp_cpu_pm_notifier_block = { + .notifier_call = vfp_cpu_pm_notifier, +}; + +static void vfp_pm_init(void) +{ + cpu_pm_register_notifier(&vfp_cpu_pm_notifier_block); +} + +#else +static inline void vfp_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + +/* + * Ensure that the VFP state stored in 'thread->vfpstate' is up to date + * with the hardware state. + */ +void vfp_sync_hwstate(struct thread_info *thread) +{ + unsigned int cpu = get_cpu(); + + if (vfp_state_in_hw(cpu, thread)) { + u32 fpexc = fmrx(FPEXC); + + /* + * Save the last VFP state on this CPU. + */ + /* vfp_save_state oopses on VFP11 if EX bit set */ + fmxr(FPEXC, (fpexc & ~FPEXC_EX) | FPEXC_EN); + vfp_save_state(&thread->vfpstate, fpexc | FPEXC_EN); + fmxr(FPEXC, fpexc); + } + + put_cpu(); +} + +/* Ensure that the thread reloads the hardware VFP state on the next use. */ +void vfp_flush_hwstate(struct thread_info *thread) +{ + unsigned int cpu = get_cpu(); + + vfp_force_reload(cpu, thread); + + put_cpu(); +} + +/* + * Save the current VFP state into the provided structures and prepare + * for entry into a new function (signal handler). + */ +int vfp_preserve_user_clear_hwstate(struct user_vfp __user *ufp, + struct user_vfp_exc __user *ufp_exc) +{ + struct thread_info *thread = current_thread_info(); + struct vfp_hard_struct *hwstate = &thread->vfpstate.hard; + int err = 0; + + /* Ensure that the saved hwstate is up-to-date. */ + vfp_sync_hwstate(thread); + + /* + * Copy the floating point registers. There can be unused + * registers see asm/hwcap.h for details. + */ + err |= __copy_to_user(&ufp->fpregs, &hwstate->fpregs, + sizeof(hwstate->fpregs)); + /* + * Copy the status and control register. + */ + __put_user_error(hwstate->fpscr, &ufp->fpscr, err); + + /* + * Copy the exception registers. + */ + __put_user_error(hwstate->fpexc, &ufp_exc->fpexc, err); + __put_user_error(hwstate->fpinst, &ufp_exc->fpinst, err); + __put_user_error(hwstate->fpinst2, &ufp_exc->fpinst2, err); + + if (err) + return -EFAULT; + + /* Ensure that VFP is disabled. */ + vfp_flush_hwstate(thread); + + /* + * As per the PCS, clear the length and stride bits for function + * entry. + */ + hwstate->fpscr &= ~(FPSCR_LENGTH_MASK | FPSCR_STRIDE_MASK); + return 0; +} + +/* Sanitise and restore the current VFP state from the provided structures. */ +int vfp_restore_user_hwstate(struct user_vfp __user *ufp, + struct user_vfp_exc __user *ufp_exc) +{ + struct thread_info *thread = current_thread_info(); + struct vfp_hard_struct *hwstate = &thread->vfpstate.hard; + unsigned long fpexc; + int err = 0; + u32 fpsid = fmrx(FPSID); + + /* Disable VFP to avoid corrupting the new thread state. */ + vfp_flush_hwstate(thread); + + /* + * Copy the floating point registers. There can be unused + * registers see asm/hwcap.h for details. + */ + err |= __copy_from_user(&hwstate->fpregs, &ufp->fpregs, + sizeof(hwstate->fpregs)); + /* + * Copy the status and control register. + */ + __get_user_error(hwstate->fpscr, &ufp->fpscr, err); + + /* + * Sanitise and restore the exception registers. + */ + __get_user_error(fpexc, &ufp_exc->fpexc, err); + + /* Ensure the VFP is enabled. */ + fpexc |= FPEXC_EN; + + /* Mask FPXEC_EX and FPEXC_FP2V if not required by VFP arch */ + if ((fpsid & FPSID_ARCH_MASK) != (1 << FPSID_ARCH_BIT)) { + /* Ensure FPINST2 is invalid and the exception flag is cleared. */ + fpexc &= ~(FPEXC_EX | FPEXC_FP2V); + } + + hwstate->fpexc = fpexc; + + __get_user_error(hwstate->fpinst, &ufp_exc->fpinst, err); + __get_user_error(hwstate->fpinst2, &ufp_exc->fpinst2, err); + + return err ? -EFAULT : 0; +} + +/* + * VFP hardware can lose all context when a CPU goes offline. + * As we will be running in SMP mode with CPU hotplug, we will save the + * hardware state at every thread switch. We clear our held state when + * a CPU has been killed, indicating that the VFP hardware doesn't contain + * a threads VFP state. When a CPU starts up, we re-enable access to the + * VFP hardware. The callbacks below are called on the CPU which + * is being offlined/onlined. + */ +static int vfp_dying_cpu(unsigned int cpu) +{ + vfp_current_hw_state[cpu] = NULL; + return 0; +} + +static int vfp_starting_cpu(unsigned int unused) +{ + vfp_enable(NULL); + return 0; +} + +void vfp_kmode_exception(void) +{ + /* + * If we reach this point, a floating point exception has been raised + * while running in kernel mode. If the NEON/VFP unit was enabled at the + * time, it means a VFP instruction has been issued that requires + * software assistance to complete, something which is not currently + * supported in kernel mode. + * If the NEON/VFP unit was disabled, and the location pointed to below + * is properly preceded by a call to kernel_neon_begin(), something has + * caused the task to be scheduled out and back in again. In this case, + * rebuilding and running with CONFIG_DEBUG_ATOMIC_SLEEP enabled should + * be helpful in localizing the problem. + */ + if (fmrx(FPEXC) & FPEXC_EN) + pr_crit("BUG: unsupported FP instruction in kernel mode\n"); + else + pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n"); +} + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Kernel-side NEON support functions + */ +void kernel_neon_begin(void) +{ + struct thread_info *thread = current_thread_info(); + unsigned int cpu; + u32 fpexc; + + /* + * Kernel mode NEON is only allowed outside of interrupt context + * with preemption disabled. This will make sure that the kernel + * mode NEON register contents never need to be preserved. + */ + BUG_ON(in_interrupt()); + cpu = get_cpu(); + + fpexc = fmrx(FPEXC) | FPEXC_EN; + /* vfp_save_state oopses on VFP11 if EX bit set */ + fmxr(FPEXC, fpexc & ~FPEXC_EX); + + /* + * Save the userland NEON/VFP state. Under UP, + * the owner could be a task other than 'current' + */ + if (vfp_state_in_hw(cpu, thread)) + vfp_save_state(&thread->vfpstate, fpexc); +#ifndef CONFIG_SMP + else if (vfp_current_hw_state[cpu] != NULL) + vfp_save_state(vfp_current_hw_state[cpu], fpexc); +#endif + vfp_current_hw_state[cpu] = NULL; +} +EXPORT_SYMBOL(kernel_neon_begin); + +void kernel_neon_end(void) +{ + /* Disable the NEON/VFP unit. */ + fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); + put_cpu(); +} +EXPORT_SYMBOL(kernel_neon_end); + +#endif /* CONFIG_KERNEL_MODE_NEON */ + +/* + * VFP support code initialisation. + */ +static int __init vfp_init(void) +{ + unsigned int vfpsid; + unsigned int cpu_arch = cpu_architecture(); + + /* + * Enable the access to the VFP on all online CPUs so the + * following test on FPSID will succeed. + */ + if (cpu_arch >= CPU_ARCH_ARMv6) + on_each_cpu(vfp_enable, NULL, 1); + + /* + * First check that there is a VFP that we can use. + * The handler is already setup to just log calls, so + * we just need to read the VFPSID register. + */ + vfp_vector = vfp_testing_entry; + barrier(); + vfpsid = fmrx(FPSID); + barrier(); + vfp_vector = vfp_null_entry; + + pr_info("VFP support v0.3: "); + if (VFP_arch) { + pr_cont("not present\n"); + return 0; + /* Extract the architecture on CPUID scheme */ + } else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) { + VFP_arch = vfpsid & FPSID_CPUID_ARCH_MASK; + VFP_arch >>= FPSID_ARCH_BIT; + /* + * Check for the presence of the Advanced SIMD + * load/store instructions, integer and single + * precision floating point operations. Only check + * for NEON if the hardware has the MVFR registers. + */ + if (IS_ENABLED(CONFIG_NEON) && + (fmrx(MVFR1) & 0x000fff00) == 0x00011100) + elf_hwcap |= HWCAP_NEON; + + if (IS_ENABLED(CONFIG_VFPv3)) { + u32 mvfr0 = fmrx(MVFR0); + if (((mvfr0 & MVFR0_DP_MASK) >> MVFR0_DP_BIT) == 0x2 || + ((mvfr0 & MVFR0_SP_MASK) >> MVFR0_SP_BIT) == 0x2) { + elf_hwcap |= HWCAP_VFPv3; + /* + * Check for VFPv3 D16 and VFPv4 D16. CPUs in + * this configuration only have 16 x 64bit + * registers. + */ + if ((mvfr0 & MVFR0_A_SIMD_MASK) == 1) + /* also v4-D16 */ + elf_hwcap |= HWCAP_VFPv3D16; + else + elf_hwcap |= HWCAP_VFPD32; + } + + if ((fmrx(MVFR1) & 0xf0000000) == 0x10000000) + elf_hwcap |= HWCAP_VFPv4; + } + /* Extract the architecture version on pre-cpuid scheme */ + } else { + if (vfpsid & FPSID_NODOUBLE) { + pr_cont("no double precision support\n"); + return 0; + } + + VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; + } + + cpuhp_setup_state_nocalls(CPUHP_AP_ARM_VFP_STARTING, + "arm/vfp:starting", vfp_starting_cpu, + vfp_dying_cpu); + + vfp_vector = vfp_support_entry; + + thread_register_notifier(&vfp_notifier_block); + vfp_pm_init(); + + /* + * We detected VFP, and the support code is + * in place; report VFP support to userspace. + */ + elf_hwcap |= HWCAP_VFP; + + pr_cont("implementor %02x architecture %d part %02x variant %x rev %x\n", + (vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT, + VFP_arch, + (vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT, + (vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT, + (vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT); + + return 0; +} + +core_initcall(vfp_init); diff -ruN linux-org/Documentation/ipipe-arm.rst linux/Documentation/ipipe-arm.rst --- linux-org/Documentation/ipipe-arm.rst 1970-01-01 01:00:00.000000000 +0100 +++ linux/Documentation/ipipe-arm.rst 2022-03-25 10:15:23.353003010 +0100 @@ -0,0 +1,833 @@ +.. include:: + +=============================================== +Porting the interrupt pipeline to a new ARM SoC +=============================================== + +:Copyright: |copy| 2014: Gilles Chanteperdrix +:Copyright: |copy| 2018: Philippe Gerum + +Purpose +======= + +This document is an adaptation of the original article [#f1]_ by +Gilles Chanteperdrix for the Xenomai project, detailing the changes +introduced by the interrupt pipeline into the ARM kernel. + +It is aimed at guiding you through the task of porting the I-pipe core +to a new ARM SoC, for interfacing a co-kernel such as Xenomai with +Linux, in order to implement a real-time, dual kernel system. + +.. [#f1] "Porting Xenomai dual kernel to a new ARM SoC" + https://xenomai.org/2014/09/porting-xenomai-dual-kernel-to-a-new-arm-soc/ + +Terminology +----------- + +If you are reading this document, chances are you want to get the +I-pipe to run on an ARM based *board*. Examples of **board**s are +*beagleboard*, *beaglebone*, *raspberry pi*. + +This board uses an ARM based *SoC*. Examples of **SoC**s are Atmel +AT91RM9200, Atmel AT91SAM9263, TI OMAP3530, TI OMAP4430, Freescale +IMX53. We use *SoC family* to loosely designate groups of SoCs which +have so many peripherals in common that peripheral support code is +shared between them. For instance, there is an "AT91" family, +including the AT91RM9200 and AT91SAM9263 SoCs, and several others. + +This SoC is based on a processor *core* implementing the ARM +instruction set, examples of such **core**s are ARM 926EJ-S, +Intel/Marvell Xscale, Marvell Feroceon, ARM Cortex A8, ARM Cortex A9. + +Finally, this processor core implements an ARM *architecture*, sort of +revision of the ARM instruction set. Examples of ARM **architecture**s +are armv4, armv5, armv6 and armv7. + +So, for instance, the IGEPv2 *board* uses the TI OMAP3530 *SoC*, +member of the OMAP *SoC family*, based on the ARM Cortex A8 *core*, +implementing the armv7 *architecture*. + +.. CAUTION: Starting from kernel 4.14, the I-pipe does not support + armv4 or armv5 architectures anymore, but only armv6 onward. + +Locate the ARM-specific code to port +------------------------------------ + +Initially, you should identify what are the SoC, processor core and +architecture of the SoC used by your board, then locate the +corresponding SoC and board-specific code. + +In order to figure out such information, you can use the Linux kernel +Kconfig and Makefiles in various sub-directories in the Linux kernel +sources. Linux code specific to an ARM based SoC or SoC family X is +located in arch/arm/mach-X or arch/arm/plat-X, some code may also live +in the drivers/ directory, typically in drivers/clocksource, +drivers/gpio or drivers/irqchip. + +Some devices managed by the I-pipe core (hardware timer, high +resolution counter, interrupt controller, GPIO controller) may be +specific to each SoC, and has to be adapted to run with the I-pipe +core. + +.. NOTE: If the processor core is an ARM Cortex A9, things are going +to be a bit easier, as such core contains an interrupt controller, a +hardware timer and a high resolution counter, for which drivers have +already been ported to the I-pipe. + +.. _`hardware-timer`: +Hardware timer +-------------- + +A hardware timer programmable for ticking in one-shot mode is required +by I-pipe clients such as co-kernels. Support for such timer is +abstracted by the I-pipe core in ``struct ipipe_timer``. + +For most ARM SoCs, the hardware timer details are specific to each SoC +or SoC family, therefore such ``ipipe_timer`` descriptor must be added +on a SoC per SoC basis. There are several ways, to implement this +timer descriptor in the I-pipe core. + +.. _`A9-timer`: +The Cortex-A9 case +~~~~~~~~~~~~~~~~~~ + +If the SoC you use is not based on the ARM Cortex A9 core, skip to the +:ref:`non-A9-timer `. In case of SoCs based on the ARM +Cortex A9 core, the hardware timer is provided by the processor core, +and not specific to the SoC: the good news is that the timer code has +already been modified to implement the ``struct ipipe_timer`` +descriptor into the I-pipe core (arch/arm/kernel/smp_twd.c). However, +you should make sure that the Linux kernel compiles and uses the ARM +Cortex A9 hardware timer code when compiled for your SoC. + +To this end, you should make sure that the ``smp_twd`` timer is +registered. You should make sure it declares a clock source with a +*compatible* string containing *twd-timer*. + +If the SoC does not use the ``smp_twd`` timer and there is no kernel +configuration option allowing to select it, you will have to register +per-cpu timers using :ref:`non-A9-timer `. + +.. NOTE: In some cases, the Linux support code for the Cortex A9 timer +may give imprecise timer frequency calibration results when I-pipe +updates are patched in, resulting in early shots. With proper device +tree support for the board, the proper clock frequency may be +determined automatically by the driver without resorting to any +imprecise calibration. + +.. _`non-A9-timer` +The general case +~~~~~~~~~~~~~~~~ + +You should look for the hardware timer support code for your SoC. +Usually, this may be found in drivers/clocksource or +arch/arm/mach-X/time.c or arch/arm/plat-Y/time.c. Assuming your board +uses a device tree file, you should look for a device with a +compatible string containing ``-timer`` and try and find the +corresponding file in one of the places mentioned above. + +Assuming the hardware timer is driven by the ``clock_event_device`` +infrastructure, and provides support for the one-shot mode (the +``features`` member of the clock_event_device structure contains +``CLOCK_EVT_FEAT_ONESHOT``), your job will be easy. Otherwise, you +should find the SoC data-sheet or reference guide containing the +documentation for the hardware timer registers, and try to find out +what type it is (decrementer or free-running counter with match +register), and how to use it in one-shot mode. + +You have to decide finally if you choose to share the hardware timer +used by Linux with the co-kernel, or if you are going to use a +different hardware timer (some SoC have several hardware timers +available). As a rule of thumb, it is better to use the same timer. + +The ``ipipe_timer`` structure somewhat piggybacks on the +``clock_event_device`` structure, adding a set of capabilities +required by co-kernels for receiving high-precision events from the +timer hardware via the interrupt pipeline. It is defined in +include/linux/ipipe_tickdev.h contains the following members: + +* `int irq` + +This is the IRQ number used for the timer interrupt. Providing it is +mandatory. + +* `void (*request)(struct ipipe_timer *timer, int steal)` + +This callback is invoked by the I-pipe core when the co-kernel starts +using the hardware timer. It should set the hardware timer to one-shot +mode. The ``steal`` parameter is true if the co-kernel is taking +control over the timer currently in use by Linux. + +If the hardware timer support code for Linux uses the +``clock_event_device`` infrastructure, supports one-shot mode, and the +I-pipe core is going to use the same timer as Linux, this handler may +be omitted. In such a case, the I-pipe core is going to call the +default ``set_mode`` handler defined by the corresponding +``clock_event_device`` structure. + +* `int (*set)(unsigned long ticks, void *timer)` + +This handler is called by the I-pipe core each time the co-kernel asks +for programming the next event into the hardware timer registers. It +should program the hardware timer to elapse in ``ticks`` in hardware +time unit. + +For instance, if the hardware timer is based on a decrementer, this +handler should set the decrementer register with the ``ticks`` +value. + +If the hardware timer is based on a free-running counter and a match +register instead, this handler should set the match register to the +sum of the current value of the free-running counter and the ``ticks`` +parameter. + +This function should return 0 upon success or a negative value if the +delay is too short (in case of a free-running counter and a match +register, this can be detected by re-reading the free-running counter +after having programmed the match register, if the free-running +counter has now passed the match register value, the delay was too +short, and the programming may have failed). + +If the hardware timer support code for Linux uses the +``clock_event_device`` infrastructure, supports one-shot mode, and the +I-pipe core is going to use the same timer as Linux, this handler may +be omitted. In such a case, the I-pipe core is going to call the +default ``set_next_event`` handler defined by the corresponding +``clock_event_device`` structure. + +.. CAUTION: Care must be taken however that this handler is called +from co-kernel context, therefore it may neither call any regular +Linux services, nor hold regular spinlocks. Otherwise, a separate +handler must be implemented (or if a spinlock has to be held, the +original spinlock should be turned into an :ref:`hard-spinlocks +`, provided the critical sections being covered by +such lock are short). + +* `void (*ack)(void)` + +This handler is called by the I-pipe core upon timer interrupt, and it +should acknowledge the timer interrupt at hardware timer level. It is +almost always necessary to provide such handler. + +If the hardware timer is shared with Linux, the code implementing the +proper hardware acknowledge code is generally contained in the Linux +timer interrupt. This interrupt code should be modified to only +acknowledge the timer interrupt if the timer is not controlled by the +co-kernel. See the :ref:`Example ` for a way to do this +avoiding code duplication of the timer acknowledgement code. + +* `void (*release)(struct ipipe_timer *timer)` + +This handler is called by the I-pipe core when the co-kernel releases +the hardware timer. It should restore the timer to its state at the +time when the ``request`` handler was called. For instance, if the +timer was running in periodic mode, and the ``request`` handler +switched it to one-shot mode, this handler should turn it back to +periodic mode. + +If the hardware timer support code for Linux uses the +``clock_event_device`` infrastructure, supports one-shot mode, and the +I-pipe core is going to use the same timer as Linux, this handler may +be omitted. In such a case, the I-pipe core is going to call the +default ``set_mode`` handler defined by the corresponding +``clock_event_device`` structure. + +* `const char *name` + +Name of the timer. + +If the I-pipe core is going to use the same timer as Linux, this +setting may be omitted, in which case the name defined by the +``clock_event_device`` descriptor for such timer will be used. + +* `unsigned int rating` + +Rating of the timer. If support for several hardware timers is +provided with different ratings, the one with the highest rating will +be used by the co-kernel. + +If the I-pipe core is going to use the same timer as Linux, this +setting may be omitted, in which case the rating defined by the +``clock_event_device`` descriptor for such timer will be used. + +* `unsigned long freq` + +Frequency of the hardware timer. Usually, this value can be obtained +from the clock framework (``clk_get_rate()``). + +If the I-pipe core is going to use the same timer as Linux, this +setting may be omitted, in which case the frequency defined by the +``clock_event_device`` descriptor for such timer will be used. + +* `unsigned int min_delay_ticks` + +The hardware timer minimum delay as a count of ticks. Almost all timers +based on free-running counters and match register have a threshold below +which they can not be programmed. When you program such a timer with a +too short value, the free-running counter will need to wrap before it +matches the match register again, so the timer will appear to be stopped +for a long time, then suddenly restart. + +In case when this minimum delay is known as a wallclock delay instead +of a count of hardware ticks, ``ipipe_timer_ns2ticks()`` can be used +to convert values, making sure the ``ipipe_timer.freq`` has been set +prior to that call. + +If the I-pipe core is going to use the same timer as Linux, this +setting may be omitted, in which case the delay defined by the +``clock_event_device`` descriptor for such timer will be used. + +* `const struct cpumask *cpumask` + +A cpumask containing the set of cpus where this timer will be run. On +SMP systems, there should be several ``ipipe_timer`` structures +defined, each with only one cpu in the ``cpumask`` member. + +If the I-pipe core is going to use the same timer as Linux, this +setting may be omitted, in which case the mask defined by the +``clock_event_device`` descriptor for such timer will be used. + +Once this structure is defined, there are two ways to register it to the +I-pipe core: + +* if the hardware timer support code for Linux uses the +``clock_event_device`` infrastructure and the I-pipe core is going to +use the same hardware timer as Linux, the member ``ipipe_timer`` of +the ``clock_event_device`` descriptor should be set to point at this +structure, causing an automatic registration of both descriptors when +``clockevents_register_device()`` is called by the regular kernel +code. + +* otherwise, the ``ipipe_timer_register()`` service should be called +for registering the descriptor individually. + +.. _example: +Example +~~~~~~~ + +As an example, let us look at the OMAP3 code in the I-pipe core. +Previous to the introduction of the I-pipe bits, the code looked like: + +------------------------------------------------------------------------------- + static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id) + { + struct clock_event_device *evt = &clockevent_gpt; + + __omap_dm_timer_write_status(&clkev, OMAP_TIMER_INT_OVERFLOW); + + evt->event_handler(evt); + return IRQ_HANDLED; + } +------------------------------------------------------------------------------- + +The call to ``__omap_dm_timer_write_status()`` acknowledges the +interrupt hardware timer level. + +------------------------------------------------------------------------------- + static struct clock_event_device clockevent_gpt = { + .name = "gp timer", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .shift = 32, + .set_next_event = omap2_gp_timer_set_next_event, + .set_mode = omap2_gp_timer_set_mode, + }; +------------------------------------------------------------------------------- + +This shows that the Linux hardware timer support code handles one-shot +mode, and closer inspection reveals that +``omap2_gp_timer_set_next_event()`` does not call any Linux service +which could not be called from out-of-band context. Therefore, this +implementation can be safely shared with the co-kernel. The I-pipe +core modifies this code in the following way: + +------------------------------------------------------------------------------- + static void omap2_gp_timer_ack(void) + { + __omap_dm_timer_write_status(&clkev, OMAP_TIMER_INT_OVERFLOW); + } + + static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id) + { + struct clock_event_device *evt = &clockevent_gpt; + + if (!clockevent_ipipe_stolen(evt)) + omap2_gp_timer_ack(); + + evt->event_handler(evt); + return IRQ_HANDLED; + } + + #ifdef CONFIG_IPIPE + static struct ipipe_timer omap_shared_itimer = { + .ack = omap2_gp_timer_ack, + .min_delay_ticks = 3, + }; + #endif /* CONFIG_IPIPE */ + + static struct clock_event_device clockevent_gpt = { + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .rating = 300, + .set_next_event = omap2_gp_timer_set_next_event, + .set_state_shutdown = omap2_gp_timer_shutdown, + .set_state_periodic = omap2_gp_timer_set_periodic, + .set_state_oneshot = omap2_gp_timer_shutdown, + .tick_resume = omap2_gp_timer_shutdown, + }; + + static void __init omap2_gp_clockevent_init(int gptimer_id, + const char *fck_source) + { + /* ... */ + #ifdef CONFIG_IPIPE + /* ... */ + omap_shared_itimer.irq = clkev.irq; + clockevent_gpt.ipipe_timer = &omap_shared_itimer; + /* ... */ + #endif /* CONFIG_IPIPE */ + + clockevents_register_device(&clockevent_gpt); + + /* ... */ + } +------------------------------------------------------------------------------- + +High resolution counter +----------------------- + +Since the co-kernel timer management is based on a timer running in +one-shot mode, and in order for applications to be able to measure +short time intervals, a high resolution counter is needed. + +Again, the hardware which can be used for such purposes depends on the +SoC. Reminiscent from the first Xenomai co-kernel using the I-pipe +which ran on the x86 processor architecture, this high resolution +counter is (abusively) called tsc (short for timestamp counter). + +As in the case of timer management, a C structure named +``__ipipe_tscinfo`` must be filled and registered to the I-pipe +core. You should also ensure that the symbol +"CONFIG_IPIPE_ARM_KUSER_TSC" gets selected. For instance, in +arch/arm/mach-socfpga/Kconfig, you may find: + +------------------------------------------------------------------------------- +menuconfig ARCH_SOCFPGA + bool "Altera SOCFPGA family" + depends on ARCH_MULTI_V7 + ... + select IPIPE_ARM_KUSER_TSC if IPIPE +------------------------------------------------------------------------------- + +.. _`A9-counter`: +The Cortex A9 case +~~~~~~~~~~~~~~~~~~ + +If the SoC you use is not based on the ARM Cortex A9 core, skip to the +:ref:`non-A9-counter `. In case of SoCs based on the ARM +Cortex A9 core, the hardware used as high resolution counter is +provided by the ARM core (aka "global timer"). Since this hardware is +not SoC-specific, the existing addition of ``__ipipe_tscinfo`` to +support the I-pipe (arch/arm/kernel/smp_twd.c) can be reused for all +A9-based SoCs. + +.. _`non-A9-counter`: +The general case +~~~~~~~~~~~~~~~~ + +The ``__ipipe_tscinfo`` C structure, defined in +arch/arm/include/asm/ipipe.h contains the following members: + +* ``unsigned int type`` + +The type, possible values are: + +** ``IPIPE_TSC_TYPE_FREERUNNING`` + +the tsc is based on a free-running counter + +** ``IPIPE_TSC_TYPE_DECREMENTER`` + +the tsc is based on a decrementer + +** ``IPIPE_TSC_TYPE_FREERUNNING_COUNTDOWN`` + +the tsc is based on a free-running counter, counting down + +** ``IPIPE_TSC_TYPE_FREERUNNING_TWICE`` + +the tsc is based on a free-running counter which needs to be read +twice (it sometimes returns wrong values, but never twice in a row) + +If the hardware you have at hand is not one of these, you need to + +** add a define for the type of hardware you have +(``IPIPE_TSC_TYPE_xxx``) + +** add an implementation (in assembly) for reading this counter and +extending it to a 64 bits value. See arch/arm/kernel/ipipe_tsc_asm.S and +arch/arm/kernel/ipipe_tsc.c for more details. Note that the assembly +implementation is limited in size to 96 bytes, or 24 x 32 bits +instructions. + +* ``unsigned int freq`` + +The counter frequency + +* ``unsigned long counter_vaddr`` + +The virtual address (in kernel-space) of the counter + +* ``unsigned long u.counter_paddr`` + +The physical address of the counter + +* ``unsigned long u.mask`` + +The mask of valid bits in the counter value. + +For instance 0xffffffff for a 32 bits counter, or 0xffff for a 16 bits +counter. Only a limited set of values are supported for each counter +type. If you need an unsupported value, arch/arm/kernel/ipipe_tsc.c +and arch/arm/kernel/ipipe_tsc_asm.S must be modified. + +Once a variable of type ``__ipipe_tscinfo`` is defined, it can be +registered to the I-pipe core with ``__ipipe_tsc_register()``. + +For instance, in arch/arm/mach-davinci/time.c, we have: + +------------------------------------------------------------------------------- +#ifdef CONFIG_IPIPE +static struct __ipipe_tscinfo tsc_info = { + .type = IPIPE_TSC_TYPE_FREERUNNING, + .u = { + { + .mask = 0xffffffff, + }, + }, +}; +#endif /* CONFIG_IPIPE */ + +void __init davinci_timer_init(void) +{ +#ifdef CONFIG_IPIPE + tsc_info.freq = davinci_clock_tick_rate; + tsc_info.counter_vaddr = (void *)(timers[TID_CLOCKSOURCE].base + + timers[TID_CLOCKSOURCE].tim_off); + tsc_info.u.counter_paddr = timers[TID_CLOCKSOURCE].pbase + + timers[TID_CLOCKSOURCE].tim_off; + __ipipe_tsc_register(&tsc_info); + /* ... */ +#endif /* CONFIG_IPIPE */ +} + +------------------------------------------------------------------------------- + +Since the tsc implementation extends the precision of the underlying +hardware counter to 64 bits, it also needs to be refreshed at a lower +period than the hardware counter wrap time. This refreshing is done by +``__ipipe_tsc_update()``, which is called periodically for a +registered tsc. + +If your hardware timer is based on a 16 bits counter, +``__ipipe_tsc_update()`` should be called in the ``ipipe_timer``'s +``set()`` handler as well, every time the hardware timer is +programmed. + +.. _`interrupt-controller`: +Interrupt controller +-------------------- + +The I-pipe core interacts with the SoC interrupt controller, for +implementing the deferred interrupt model. An interrupt is first +acknowledged and masked at the interrupt controller level, but handled +then unmasked by the regular Linux IRQ handler only when all +out-of-band activity is done. + +Fortunately, as for timer management, interrupt controllers specifics +are embedded in the ``irq_chip`` C structure, and interactions with +them are implemented in a generic way, so almost no modifications need +to be done in the SoC-specific code. There are a few things to which +you should pay attention to, though. + +As in the case of the timer and high resolution counter, the Cortex A9 +processor core contains an interrupt controller. If your SoC is based +on the Cortex A9 core, you can skip to :ref:`config-multi-irq-handler +`. + +Otherwise, you should locate the code for the interrupt controller +management. Usually, the IRQ controller driver is located in +drivers/irqchip, arch/arm/mach-X/irq.c or arch/arm/plat-Y/irq.c. As +for the hardware timer, the irqchip should be registered through the +device tree, so you should look in the SoC device tree file for a node +with one of the "compatible" strings passed to the IRQCHIP_DECLARE +macro in the kernel sources. + +IC handlers +~~~~~~~~~~~ + +The following handlers defined by the ``irq_chip`` C structure may be +called from an out-of-band context immediately upon IRQ receipt, so +they must not call any regular Linux services: + +* ``irq_ack`` +* ``irq_mask_ack`` +* ``irq_eoi`` +* ``irq_mask`` +* ``irq_unmask`` + +In particular, regular Linux spinlocks used in those routines should +be turned into an :ref:`hard-spinlocks `, making sure +this would not entail unacceptable latencies from other places such +lock is held. + +flow handlers +~~~~~~~~~~~~~ + +If the original flow handler for the IRQ is ``handle_fasteoi_irq()``, +two I-pipe specific IC handlers should be defined by the ``irq_chip`` +descriptor: + +* ``irq_hold`` should mask then EOI the interrupt line, i.e. same as + calling ``irq_mask`` and ``irq_eoi`` subsequently. + +* ``irq_release`` should unmask the interrupt line, i.e. same as + calling ``irq_unmask``. + +If the flow handler is ``handle_edge_irq()`` and the systems locks up +when the first interrupt is received, try turning the flow handler to +``handle_level_irq()`` instead. + +.. _`config-multi-irq-handler`: +CONFIG_MULTI_IRQ_HANDLER +~~~~~~~~~~~~~~~~~~~~~~~~ + +If the SoC you use enables this option, look into the board file +between the MACHINE_START and MACHINE_END declarations for the +``handle_irq`` member. The original implementation of this routine +should be in the interrupt controller file, exhibiting a decoding loop +of interrupts numbers reading the hardware registers, eventually +calling ``handle_IRQ`` for each decoded IRQ. + +Once again, you must make sure that no regular Linux routine is called +by this low-level IRQ decoding handler, invoking +``ipipe_handle_multi_irq`` instead of ``handle_IRQ``. + +Likewise, on SMP systems, calls to ``handle_IPI`` should be replaced +by a call to ``ipipe_handle_multi_ipi``. + +multi-processor systems +~~~~~~~~~~~~~~~~~~~~~~~ + +On multi-processor systems, IPIs are mapped to virtual pipelined IRQs +(aka *virqs*), and the SoC support needs no addition. + +.. _GPIOs +GPIOs +~~~~~ + +Most SoCs have GPIOs. In the context of a co-kernel, they are +interesting for two reasons: + +* they may be used by real-time drivers as input or output for +communicating with external peripherals. +* they may be used as interrupt sources. + +GPIOs in real-time drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As for hardware timers and interrupt controllers, the specifics of a +GPIO controller are embedded in a structure, i.e. ``gpio_chip``. The +definition for the SoC is usually found in one of the files: +drivers/gpio-X.c, arch/arm/mach-Y/gpio.c, arch/arm/plat-Z/gpio.c. + +These handlers are accessible using the regular *gpiolib* +infrastructure. + +For instance, the ``gpio_chip`` descriptor defines a ``get`` handler, +which is indirectly called from ``gpio_get_value`` for reading out the +current level for any given pin. + +Here again, you must make sure that no regular Linux routine is called +by GPIO handlers. If this is the case: + +* if the implementation of these handlers need to communicate with an +I2C or SPI chip, the code as it is needs significant changes to be made +available to real-time drivers, starting with rewriting the driver for +the I2C or SPI controller as a driver running in real-time domain; + +* if the implementation of these handlers simply uses a spinlock, the +spinlock may be turned into an :ref:`hard-spinlocks ` +(pay attention, however, that there is not other Linux service called, +or actions which may take an unbounded time when holding the +spinlock). + +GPIOs as interrupt sources +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Most SoCs have so many GPIOs, that each one can not have a separate +line at the interrupt controller level, so they are multiplexed. What +happens then is that there is a single line for a whole GPIO bank, the +interrupt handler for this irq line should read a GPIO controller +register to find out which of the GPIOs interrupts are pending, then +invoke the handler for each of them. The mechanism used by the Linux +kernel to handle this situation is called "chained interrupts", you +can find whether the SoC you use in this case if it calls the function +"irq_set_chained_handler". It is usually found in +drivers/gpio/gpio-X.c, arch/arm/mach-Y/gpio.c, arch/arm/plat-Z/gpio.c, +arch/arm/mach-X/irq.c, or arch/arm/plat-Y/irq.c. + +What will happen with the I-pipe core, is that the handler registered +with "irq_set_chained_handler" will be called in real-time context, so +should not use any Linux service which can not be used from real-time +context, in particular, calls to "generic_handle_irq", should be +replaced with calls to "ipipe_handle_demuxed_irq". + +When GPIOs are used as interrupt sources, a "struct irq_chip" is +defined, allowing the kernel to see the GPIOs controller as an +interrupt controller, so, most of what is said in the +:ref:`interrupt-controller <"Interrupt controller" section>` also +applies to the GPIO controller. Most of the time, though, the "flow +handler" for these interrupts is "handle_simple_irq", and nothing +needs to be done. + +.. _`hard-spinlocks` +I-pipe spinlocks +---------------- + +Occasionally, some spinlocks need to be shared between the real-time and +Linux domains. We have talked about this in the +:ref:`hardware-timer <"Hardware timer">`, +:ref:`interrupt-controller <"Interrupt controller">` and +:ref:`GPIOs <"GPIOs">` sections. + +However, beware, this is not a panacea: calling a regular kernel +routine while holding this spinlock may end up in a train wreck for +the system, at the very least cause the response time skyrocket for +the co-kernel applications. + +The I-pipe provides macros to turn a regular or raw kernel spinlock +definitions into I-pipe hard spinlocks, and others to declare the +latter. + +[cols=",",] +|============================================================== +|Linux code |Should be replaced with +|``extern raw_spinlock_t foo`` |``IPIPE_DECLARE_RAW_SPINLOCK(foo)`` +|``DEFINE_RAW_SPINLOCK(foo)`` |``IPIPE_DEFINE_RAW_SPINLOCK(foo)`` +|``extern spinlock_t foo`` |``IPIPE_DECLARE_SPINLOCK(foo)`` +|``DEFINE_SPINLOCK(foo)`` |``IPIPE_DEFINE_SPINLOCK(foo)`` +|============================================================== + +For instance, in arch/arm/mm/context.c + +------------------------------------------------------------------------------- + static DEFINE_RAW_SPINLOCK(cpu_asid_lock); +------------------------------------------------------------------------------- + +is replaced with: + +------------------------------------------------------------------------------- + static IPIPE_DEFINE_RAW_SPINLOCK(cpu_asid_lock); +------------------------------------------------------------------------------- + +In addition to the usual ``spin_lock()``, ``spin_unlock()``, +``spin_lock_irqsave()`` and ``spin_unlock_irqrestore()`` routines, the +I-pipe core provides the ``spin_lock_irqsave_cond()``, +``spin_unlock_irqrestore_cond()``. + +These services are replaced with their ``spin_lock_irqsave()`` / +``spin_unlock_irqrestore()`` counterparts when compiling the Linux +kernel with the I-pipe core enabled, and replaced with ``spin_lock()`` +/ ``spin_unlock()`` otherwise. + +This is useful for protecting a critical section of the regular kernel +code against preemption from out-of-band IRQ handlers. + +[[troubleshooting]] +Troubleshooting +--------------- + +When you have modified the I-pipe core for supporting your board, try: + +* to boot the kernel for your board compiled without CONFIG_IPIPE +enabled +* boot the kernel for your board compiled with CONFIG_IPIPE enabled but +without the co-kernel (e.g. disable CONFIG_XENOMAI). +* boot the kernel for your board compiles with CONFIG_IPIPE and +the co-kernel (e.g. enable CONFIG_XENOMAI). +* run the latency test + +If any of this step does not work correctly, do not go further, try and +debug the said step first. + +Common issues include: + +[[the-kernel-stops-after-the-message-uncompressing-linux]] +The kernel stops after the message "Uncompressing Linux... done, booting the kernel." +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The screen remains blank, nothing happens. It means that the kernel has +a oops, or lock-up early during the boot process. In order to understand +what happens: + +* enable CONFIG_DEBUG_LL and CONFIG_EARLY_PRINTK in the kernel +configuration, recompile the kernel. + +.. CAUTION: make sure to configure the debug UART properly, otherwise + this may crash the kernel in the early boot stage. + +* add "earlyprintk" to the kernel parameters + +The kernel messages should then be displayed immediately, allowing to +figure out at what point in the boot process the kernel crashes or +locks up. + +[[the-kernel-stops-after-the-message-calibrating-delay-loop]] +The kernel stops after the message "Calibrating delay loop..." +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It means that the timer interrupt is not ticking and that the delay +calibration routine is running an infinite loop at ``while (ticks == +jiffies)`` in the function ``calibrate_delay()``, file +init/calibrate.c + +This probably means that changes you made to the hardware timer support +or interrupt controller code broke something. To help debugging this +situation, you can print any hardware timer or interrupt controller +register in the ``while (ticks == jiffies)`` loop. + +[[timer-issues]] +Timer issues +~~~~~~~~~~~~ + +Most issues when porting the I-pipe core to a new ARM SoC are timer +issues, the timer is the hardest part to get right. + +When you boot the kernel without CONFIG_IPIPE, the timer code should +be almost not modified, except maybe for timer acknowledgement. If at +this point the kernel does not work, it probably means that you got +the timer acknowledgement wrong. + +When you boot the kernel with CONFIG_IPIPE, but without enabling the +co-kernel (e.g. CONFIG_XENOMAI), the hardware timer remains controlled +by the Linux kernel. If at this point the kernel does not work, it +probably means that something else than the timer is wrong, most +likely in the interrupt controller support code. + +When you boot the dual kernel system, the co-kernel usually takes +control over the hardware timer, invoking the ``struct ipipe_timer`` +:ref:`non-A9-timer ` for processing a regular Linux timer +tick. If at this point the kernel does not work, it probably means +that some of those handlers are wrong. + +Finally, only when running some co-kernel application like a latency +measurement test, the timer is eventually used to activate co-kernel +threads in the real-time domain. You should check that the latency +test prints a message every second, if it does not, it probably means +that the timer frequency is wrong, but in accordance with the tsc +frequency. + +A *drift* in the minimum and maximum latency values indicates a +mismatch between the timer and the tsc frequency. Unacceptably large +latency values is likely caused by a section of code running for too +long with interrupts masked, or some issue caused by the idle loop. diff -ruN linux-org/Documentation/ipipe.rst linux/Documentation/ipipe.rst --- linux-org/Documentation/ipipe.rst 1970-01-01 01:00:00.000000000 +0100 +++ linux/Documentation/ipipe.rst 2022-03-25 10:15:23.353003010 +0100 @@ -0,0 +1,919 @@ +.. include:: + +=================================== +The Interrupt Pipeline (aka I-pipe) +=================================== + +:Copyright: |copy| 2018: Philippe Gerum + +Purpose +======= + +Using Linux as a host for lightweight software cores specialized in +delivering very short and bounded response times has been a popular +way of supporting real-time applications in the embedded space over +the years. + +This design - known as the *dual kernel* approach - introduces a small +real-time infrastructure which schedules time-critical activities +independently from the main kernel. Application threads co-managed by +this infrastructure still benefit from the ancillary kernel services +such as virtual memory management, and can also leverage the rich GPOS +feature set Linux provides such as networking, data storage or GUIs. + +Although the real-time infrastructure has to present specific driver +stack and API implementations to applications, there are nonetheless +significant upsides to keeping the real-time core separate from the +GPOS infrastructure: + +- because the two kernels are independent, real-time activities are + not serialized with GPOS operations internally, removing potential + delays which might be induced by the non time-critical + work. Likewise, there is no requirement for keeping the GPOS + operations fine-grained and highly preemptible at any time, which + would otherwise induce noticeable overhead on low-end hardware, due + to the requirement for pervasive task priority inheritance and IRQ + threading. + +- the functional isolation of the real-time infrastructure from the + rest of the kernel code restricts common bug hunting to the scope of + the smaller kernel, excluding most interactions with the very large + GPOS kernel base. + +- with a dedicated infrastructure providing a specific, well-defined + set of real-time services, applications can unambiguously figure out + which API calls are available for supporting time-critical work, + excluding all the rest as being potentially non-deterministic with + respect to response time. + +To support such a *dual kernel system*, we need the kernel to exhibit +a high-priority execution context, for running out-of-band real-time +duties concurrently to the regular operations. + +.. NOTE:: The I-pipe only introduces the basic mechanisms for hosting +such a real-time core, enabling the common programming model for its +applications in user-space. It does *not* implement the real-time core +per se, which should be provided by a separate kernel component. + +The issue of interrupt response time +==================================== + +The real-time core has to act upon device interrupts with no delay, +regardless of the regular kernel operations which may be ongoing when +the interrupt is received by the CPU. + +However, to protect from deadlocks and maintain data integrity, Linux +normally hard disables interrupts around any critical section of code +which must not be preempted by interrupt handlers on the same CPU, +enforcing a strictly serialized execution among those contexts. + +The unpredictable delay this may cause before external events can be +handled is a major roadblock for kernel components requiring +predictable and very short response times to external events, in the +range of a few microseconds. + +Therefore, there is a basic requirement for prioritizing interrupt +masking and delivery between the real-time core and GPOS operations, +while maintaining consistent internal serialization for the kernel. + +To address this issue, the I-pipe implements a mechanism called +*interrupt pipelining* turns all device IRQs into NMIs, only to run +NMI-safe interrupt handlers from the perspective of the regular kernel +activities. + +Two-stage IRQ pipeline +====================== + +.. _pipeline +Interrupt pipelining is a lightweight approach based on the +introduction of a separate, high-priority execution stage for running +out-of-band interrupt handlers immediately upon IRQ receipt, which +cannot be delayed by the in-band, regular kernel work even if the +latter serializes the execution by - seemingly - disabling interrupts. + +IRQs which have no handlers in the high priority stage may be deferred +on the receiving CPU until the out-of-band activity has quiesced on +that CPU. Eventually, the preempted in-band code can resume normally, +which may involve handling the deferred interrupts. + +In other words, interrupts are flowing down from the out-of-band to +the in-band interrupt stages, which form a two-stage pipeline for +prioritizing interrupt delivery. + +The runtime context of the out-of-band interrupt handlers is known as +the *head stage* of the pipeline, as opposed to the in-band kernel +activities sitting on the *root stage*:: + + Out-of-band In-band + IRQ handlers() IRQ handlers() + __________ _______________________ ______ + . / / . . / / . + . / / . . / / . + . / / . . / / . + ___/ /______________________/ / . + [IRQ] -----> _______________________________/ . + . . . . + . Head . . Root . + . Stage . . Stage . + _____________________________________________ + + +A software core may base its own activities on the head stage, +interposing on specific IRQ events, for delivering real-time +capabilities to a particular set of applications. Meanwhile, the +regular kernel operations keep going over the root stage unaffected, +only delayed by short preemption times for running the out-of-band +work. + +.. NOTE:: Interrupt pipelining is a partial implementation of [#f2]_, + in which an interrupt *stage* is a limited form of an + operating system *domain*. + +Virtual interrupt flag +---------------------- + +.. _flag: +As hinted earlier, predictable response time of out-of-band handlers +to IRQ receipts requires the in-band kernel work not to be allowed to +delay them by masking interrupts in the CPU. + +However, critical sections delimited this way by the in-band code must +still be enforced for the *root stage*, so that system integrity is +not at risk. This means that although out-of-band IRQ handlers may run +at any time while the *head stage* is accepting interrupts, in-band +IRQ handlers should be allowed to run only when the root stage is +accepting interrupts too. + +So we need to decouple the interrupt masking and delivery logic which +applies to the head stage from the one in effect on the root stage, by +implementing a dual interrupt control mechanism. + +To this end, a software logic managing a virtual interrupt flag (aka +*IPIPE_STALL_FLAG*) is introduced by the interrupt pipeline between +the hardware and the generic IRQ management layer. This logic can mask +IRQs from the perspective of the regular kernel work when +:c:func:`local_irq_save`, :c:func:`local_irq_disable` or any +lock-controlled masking operations like :c:func:`spin_lock_irqsave` is +called, while still accepting IRQs from the CPU for immediate delivery +to out-of-band handlers. + +The head stage protects from interrupts by disabling them in the CPU's +status register, while the root stage disables interrupts only +virtually. A stage for which interrupts are disabled is said to be +*stalled*. Conversely, *unstalling* a stage means re-enabling +interrupts for it. + +Obviously, stalling the head stage implicitly means disabling +further IRQ receipts for the root stage too. + +Interrupt deferral for the *root stage* +--------------------------------------- + +.. _deferral: +.. _deferred: +When the root stage is stalled by setting the virtual interrupt flag, +the occurrence of any incoming IRQ which was not delivered to the +*head stage* is recorded into a per-CPU log, postponing its actual +delivery to the root stage. + +The delivery of the interrupt event to the corresponding in-band IRQ +handler is deferred until the in-band kernel code clears the virtual +interrupt flag by calling :c:func:`local_irq_enable` or any of its +variants, which unstalls the root stage. When this happens, the +interrupt state is resynchronized by playing the log, firing the +in-band handlers for which an IRQ was set pending. + +:: + /* Both stages unstalled on entry */ + local_irq_save(flags); + + (pipeline logs IRQx event) + ... + local_irq_restore(flags); + (pipeline plays IRQx event) + handle_IRQx_interrupt(); + +If the root stage is unstalled at the time of the IRQ receipt, the +in-band handler is immediately invoked, just like with the +non-pipelined IRQ model. + +.. NOTE:: The principle of deferring interrupt delivery based on a + software flag coupled to an event log has been originally + described as "Optimistic interrupt protection" in [#f1]_. + +Device interrupts virtually turned into NMIs +-------------------------------------------- + +From the standpoint of the in-band kernel code (i.e. the one running +over the *root* interrupt stage) , the interrupt pipelining logic +virtually turns all device IRQs into NMIs, for running out-of-band +handlers. + +.. _re-entry: +For this reason, out-of-band code may generally **NOT** re-enter +in-band code, for preventing creepy situations like this one:: + + /* in-band context */ + spin_lock_irqsave(&lock, flags); + + handle_oob_event(); + /* attempted re-entry to in-band from out-of-band. */ + in_band_routine(); + spin_lock_irqsave(&lock, flags); + + ... + ... + ... + ... + spin_unlock irqrestore(&lock, flags); + +Even in absence of any attempt to get a spinlock recursively, the +outer in-band code in the example above is entitled to assume that no +access race can occur on the current CPU while interrupts are +masked. Re-entering in-band code from an out-of-band handler would +invalidate this assumption. + +In rare cases, we may need to fix up the in-band kernel routines in +order to allow out-of-band handlers to call them. Typically, atomic_ +helpers are such routines, which serialize in-band and out-of-band +callers. + +Virtual/Synthetic interrupt vectors +----------------------------------- + +.. _synthetic: +.. _virtual: +The pipeline introduces an additional type of interrupts, which are +purely software-originated, with no hardware involvement. These IRQs +can be triggered by any kernel code. So-called virtual IRQs are +inherently per-CPU events. + +Because the common pipeline flow_ applies to virtual interrupts, it +is possible to attach them to out-of-band and/or in-band handlers, +just like device interrupts. + +.. NOTE:: virtual interrupts and regular softirqs differ in essence: + the latter only exist in the in-band context, and therefore + cannot trigger out-of-band activities. + +Virtual interrupt vectors are allocated by a call to +:c:func:`ipipe_alloc_virq`, and conversely released with +:c:func:`ipipe_free_virq`. + +For instance, a virtual interrupt can be used for triggering an +in-band activity on the root stage from the head stage as follows:: + + #include + + static void virq_handler(unsigned int virq, void *cookie) + { + do_in_band_work(); + } + + void install_virq(void) + { + unsigned int virq; + ... + virq = ipipe_alloc_virq(); + ... + ipipe_request_irq(ipipe_root_domain, virq, virq_handler, + handler_arg, NULL); + } + +An out-of-band handler can schedule the execution of +:c:func:`virq_handler` like this:: + + ipipe_post_irq_root(virq); + +Conversely, a virtual interrupt can be handled from the out-of-band +context:: + + static void virq_oob_handler(unsigned int virq, void *cookie) + { + do_oob_work(); + } + + void install_virq(void) + { + unsigned int virq; + ... + virq = ipipe_alloc_virq(); + ... + ipipe_request_irq(ipipe_head_domain, virq, virq_oob_handler, + handler_arg, NULL); + } + +Any in-band code can trigger the immediate execution of +:c:func:`virq_oob_handler` on the head stage as follows:: + + ipipe_post_irq_head(virq); + +Pipelined interrupt flow +------------------------ + +.. _flow: +When interrupt pipelining is enabled, IRQs are first delivered to the +pipeline entry point via a call to the generic +:c:func:`__ipipe_dispatch_irq` routine. Before this happens, the event +has been propagated through the arch-specific code for handling an IRQ:: + + asm_irq_entry + -> irqchip_handle_irq() + -> ipipe_handle_domain_irq() + -> __ipipe_grab_irq() + -> __ipipe_dispatch_irq() + -> irq_flow_handler() + + +Contrary to the non-pipelined model, the generic IRQ flow handler does +*not* call the in-band interrupt handler immediately, but only runs +the irqchip-specific handler for acknowledging the incoming IRQ event +in the hardware. + +.. _Holding interrupt lines: +If the interrupt is either of the *level-triggered*, *fasteoi* or +*percpu* type, the irqchip is given a chance to hold the interrupt +line, typically by masking it, until either of the out-of-band or +in-band handler have run. This addresses the following scenario, which +happens for a similar reason while an IRQ thread waits for being +scheduled in, requiring the same kind of provision:: + + /* root stage stalled on entry */ + asm_irq_entry + ... + -> __ipipe_dispatch_irq() + ... + + asm_irq_exit + /* + * CPU allowed to accept interrupts again with IRQ cause not + * acknowledged in device yet => **IRQ storm**. + */ + asm_irq_entry + ... + asm_irq_exit + asm_irq_entry + ... + asm_irq_exit + +IRQ delivery logic +------------------ + +If an out-of-band handler exists for the interrupt received, +:c:func:`__ipipe_dispatch_irq` invokes it immediately, after switching +the execution context to the head stage if not current yet. + +Otherwise, if the execution context is currently over the root stage +and unstalled, the pipeline core delivers it immediately to the +in-band handler. + +In all other cases, the interrupt is only set pending into the per-CPU +log, then the interrupt frame is left. + +Alternate scheduling +==================== + +The I-pipe promotes the idea that a *dual kernel* system should keep +the functional overlap between the kernel and the real-time core +minimal. To this end, a real-time thread should be merely seen as a +regular task with additional scheduling capabilities guaranteeing very +low response times. + +To support such idea, the I-pipe enables kthreads and regular user +tasks to run alternatively in the out-of-band execution context +introduced by the interrupt pipeline_ (aka *head* stage), or the +common in-band kernel context for GPOS operations (aka *root* stage). + +As a result, real-time core applications in user-space benefit from +the common Linux programming model - including virtual memory +protection -, and still have access to the regular Linux services for +carrying out non time-critical work. + +Task migration to the head stage +-------------------------------- + +Low latency response time to events can be achieved when Linux tasks +wait for them from the out-of-band execution context. The real-time +core is responsible for switching a task to such a context as part of +its task management rules; the I-pipe facilitates this migration with +dedicated services. + +The migration process of a task from the GPOS/in-band context to the +high-priority, out-of-band context is as follows: + +1. :c:func:`__ipipe_migrate_head` is invoked from the migrating task + context, with the same prerequisites than for calling + :c:func:`schedule` (preemption enabled, interrupts on). + +.. _`in-band sleep operation`: +2. the caller is put to interruptible sleep state (S). + +3. before resuming in-band operations, the next task picked by the + (regular kernel) scheduler on the same CPU for replacing the + migrating task fires :c:func:`ipipe_migration_hook` which the + real-time core should override (*__weak* binding). Before the call, + the head stage is stalled, interrupts are disabled in the CPU. The + root execution stage is still current though. + +4. the real-time core's implementation of + :c:func:`ipipe_migration_hook` is passed a pointer to the + task_struct descriptor of the migrating task. This routine is expected + to perform the necessary steps for taking control over the task on + behalf of the real-time core, re-scheduling its code appropriately + over the head stage. This typically involves resuming it from the + `out-of-band suspended state`_ applied during the converse migration + path. + +5. at some point later, when the migrated task is picked by the + real-time scheduler, it resumes execution on the head stage with + the register file previously saved by the kernel scheduler in + :c:func:`switch_to` at step 1. + +Task migration to the root stage +-------------------------------- + +Sometimes, a real-time thread may want to leave the out-of-band +context, continuing execution from the in-band context instead, so as +to: + +- run non time-critical (in-band) work involving regular system calls + handled by the kernel, + +- recover from CPU exceptions, such as handling major memory access + faults, for which there is no point in caring for response time, and + therefore makes no sense to duplicate in the real-time core anyway. + +.. NOTE: The discussion about exception_ handling covers the last + point in details. + +The migration process of a task from the high-priority, out-of-band +context to the GPOS/in-band context is as follows:: + +1. the real-time core schedules an in-band handler for execution which + should call :c:func:`wake_up_process` to unblock the migrating task + from the standpoint of the kernel scheduler. This is the + counterpart of the :ref:`in-band sleep operation ` from the converse migration path. A virtual_ IRQ can be + used for scheduling such event from the out-of-band context. + +.. _`out-of-band suspended state`: +2. the real-time core suspends execution of the current task from its + own standpoint. The real-time scheduler is assumed to be using the + common :c:func:`switch_to` routine for switching task contexts. + +3. at some point later, the out-of-band context is exited by the + current CPU when no more high-priority work is left, causing the + preempted in-band kernel code to resume execution on the root + stage. The handler scheduled at step 1 eventually runs, waking up + the migrating task from the standpoint of the kernel. + +4. the migrating task resumes from the tail scheduling code of the + real-time scheduler, where it suspended in step 2. Noticing the + migration, the real-time core eventually calls + :c:func:`__ipipe_reenter_root` for finalizing the transition of the + incoming task to the root stage. + +Binding to the real-time core +----------------------------- + +.. _binding: +The I-pipe facilitates fine-grained per-thread management from the +real-time core, as opposed to per-process. For this reason, the +real-time core should at least implement a mechanism for turning a +regular task into a real-time thread with extended capabilities, +binding it to the core. + +The real-time core should inform the kernel about its intent to +receive notifications about that task, by calling +:c:func::`ipipe_enable_notifier` when such task is current. + +For this reason, the binding operation is usually carried out by a +dedicated system call exposed by the real-time core, which a regular +task would invoke. + +.. NOTE:: Whether there should be distinct procedures for binding + processes *and* threads to the real-time core, or only a + thread binding procedure is up to the real-time core + implementation. + +Notifications +------------- + +Exception handling +~~~~~~~~~~~~~~~~~~ + +.. _exception +If a processor exception is raised while the CPU is busy running a +real-time thread in the out-of-band context (e.g. due to some invalid +memory access, bad instruction, FPU or alignment error etc), the task +may have to leave such context immediately if the fault handler is not +protected against out-of-band interrupts, and therefore cannot be +properly serialized with out-of-band code. + +The I-pipe notifies the real-time core about incoming exceptions early +from the low-level fault handlers, but only when some out-of-band code +was running when the exception was taken. The real-time core may then +take action, such as reconciling the current task's execution context +with the kernel's expectations before the task may traverse the +regular fault handling code. + +.. HINT:: Enabling debuggers to trace real-time thread involves + dealing with debug traps the former may poke into the + debuggee's code for breakpointing duties. + +The notification is issued by a call to :c:func:`__ipipe_notify_trap` +which in turn invokes the :c:func:`ipipe_trap_hook` routine the +real-time core should override for receiving those events (*__weak* +binding). Interrupts are **disabled** in the CPU when +:c:func:`ipipe_trap_hook` is called.:: + + /* out-of-band code running */ + *bad_pointer = 42; + [ACCESS EXCEPTION] + /* low-level fault handler in arch//mm */ + -> do_page_fault() + -> __ipipe_notify_trap(...) + /* real-time core */ + -> ipipe_trap_hook(...) + -> forced task migration to root stage + ... + -> handle_mm_fault() + +.. NOTE:: handling minor memory access faults only requiring quick PTE + fixups should not involve switching the current task to the + in-band context though. Instead, the fixup code should be + made atomic_ for serializing accesses from any context. + +System calls +~~~~~~~~~~~~ + +A real-time core interfaced with the kernel via the I-pipe may +introduce its own set of system calls. From the standpoint of the +kernel, this is a foreign set of calls, which can be distinguished +unambiguously from regular ones based on an arch-specific marker. + +.. HINT:: Syscall numbers from this set might have a different base, + and/or some high-order bit set which regular syscall numbers + would not have. + +If a task bound to the real-time core issues any system call, +regardless of which of the kernel or real-time core should handle it, +the latter must be given the opportunity to: + +- perform the service directly, possibly switching the caller to + out-of-band context first would the request require it. + +- pass the request downward to the normal system call path on the root + stage, possibly switching the caller to in-band context if needed. + +If a regular task (i.e. *not* known from the real-time core [yet]) +issues any foreign system call, the real-time core is given a chance +to handle it. This way, a foreign system call which would initially +bind a regular task to the real-time core would be delivered to the +real-time core as expected (see binding_). + +The I-pipe intercepts system calls early in the kernel entry code, +delivering them to the proper handler according to the following +logic:: + + is_foreign(syscall_nr)? + Y: is_bound(task) + Y: -> ipipe_fastcall_hook() + N: -> ipipe_syscall_hook() + N: is_bound(task) + Y: -> ipipe_syscall_hook() + N: -> normal syscall handling + +:c:func:`ipipe_fastcall_hook` is the fast path for handling foreign +system calls from tasks already running in out-of-band context. + +:c:func:`ipipe_syscall_hook` is a slower path for handling requests +which might require the caller to switch to the out-of-band context +first before proceeding. + +Kernel events +~~~~~~~~~~~~~ + +The last set of notifications involves pure kernel events which the +real-time core may need to know about, as they may affect its own task +management. Except for IPIPE_KEVT_CLEANUP which is called for *any* +exiting user-space task, all other notifications are only issued for +tasks bound to the real-time core (which may involve kthreads). + +The notification is issued by a call to :c:func:`__ipipe_notify_kevent` +which in turn invokes the :c:func:`ipipe_kevent_hook` routine the +real-time core should override for receiving those events (*__weak* +binding). Interrupts are **enabled** in the CPU when +:c:func:`ipipe_kevent_hook` is called. + +The notification hook is given the event type code, and a single +pointer argument which relates to the event type. + +The following events are defined (include/linux/ipipe_domain.h): + +- IPIPE_KEVT_SCHEDULE(struct task_struct *next) + + sent in preparation of a context switch, right before the memory + context is switched to *next*. + +- IPIPE_KEVT_SIGWAKE(struct task_struct *target) + + sent when *target* is about to receive a signal. The real-time core + may decide to schedule a transition of the recipient to the root + stage in order to have it handle that signal asap, which is commonly + required for keeping the kernel sane. This notification is always + sent from the context of the issuer. + +- IPIPE_KEVT_SETAFFINITY(struct ipipe_migration_data *p) + + sent when p->task is about to move to CPU p->dest_cpu. + +- IPIPE_KEVT_EXIT(struct task_struct *current) + + sent from :c:func:`do_exit` before the current task has dropped the + files and mappings it owns. + +- IPIPE_KEVT_CLEANUP(struct mm_struct *mm) + + sent before *mm* is entirely dropped, before the mappings are + exited. Per-process resources which might be maintained by the + real-time core could be released there, as all threads have exited. + + ..NOTE:: IPIPE_KEVT_SETSCHED is deprecated, and should not be used. + +Prerequisites +============= + +The interrupt pipeline requires the following features to be available +from the target kernel: + +- Generic IRQ handling +- Clock event abstraction + +Implementation +============== + +The following kernel areas are involved in interrupt pipelining: + +- Generic IRQ core + + * IRQ flow handlers + + Generic flow handlers acknowledge the incoming IRQ event in the + hardware by calling the appropriate irqchip-specific + handler. However, the generic flow_ handlers do not immediately + invoke the in-band interrupt handlers, but leave this decision to + the pipeline core which calls them, according to the pipelined + delivery logic. + +- Arch-specific bits + + * CPU interrupt mask handling + + The architecture-specific code which manipulates the interrupt + flag in the CPU's state register + (i.e. arch//include/asm/irqflags.h) is split between real + and virtual interrupt control: + + + the *hard_local_irq* level helpers affect the hardware state in + the CPU. + + + the *arch_* level helpers affect the virtual interrupt flag_ + implemented by the pipeline core for controlling the root stage + protection against interrupts. + + This means that generic helpers from such as + :c:func:`local_irq_disable` and :c:func:`local_irq_enable` + actually refer to the virtual protection scheme when interrupts + are pipelined, implementing interrupt deferral_ for the protected + in-band code running over the root stage. + + * Assembly-level IRQ, exception paths + + Since interrupts are only virtually masked by the in-band code, + IRQs can still be taken by the CPU although they should not be + visible from the root stage when they happen in the following + situations: + + + when the virtual protection flag_ is raised, meaning the root + stage does not accept IRQs, in which case interrupt _deferral + happens. + + + when the CPU runs out-of-band code, regardless of the state of + the virtual protection flag. + + In both cases, the low-level assembly code handling incoming IRQs + takes a fast exit path unwinding the interrupt frame early, + instead of running the common in-band epilogue which checks for + task rescheduling opportunities and pending signals. + + Likewise, the low-level fault/exception handling code also takes a + fast exit path under the same circumstances. Typically, an + out-of-band handler causing a minor page fault should benefit from + a lightweight PTE fixup performed by the high-level fault handler, + but is not allowed to traverse the rescheduling logic upon return + from exception. + +- Scheduler core + + * CPUIDLE support + + The logic of the CPUIDLE framework has to account for those + specific issues the interrupt pipelining introduces: + + - the kernel might be idle in the sense that no in-band activity + is scheduled yet, and planning to shut down the timer device + suffering the C3STOP (mis)feature. However, at the same time, + some out-of-band code might wait for a tick event already + programmed in the timer hardware controlled by some out-of-band + code via the timer_ interposition mechanism. + + - switching the CPU to a power saving state may incur a + significant latency, particularly for waking it up before it can + handle an incoming IRQ, which is at odds with the purpose of + interrupt pipelining. + + Obviously, we don't want the CPUIDLE logic to turn off the + hardware timer when C3STOP is in effect for the timer device, + which would cause the pending out-of-band event to be + lost. + + Likewise, the wake up latency induced by entering a sleep state on + a particular hardware may not always be acceptable. + + Since the in-band kernel code does not know about the out-of-band + code plans by design, CPUIDLE calls :c:func:`ipipe_cpuidle_control` + to figure out whether the out-of-band system is fine with entering + the idle state as well. This routine should be overriden by the + out-of-band code for receiving such notification (*__weak* + binding). + + If this hook returns a boolean *true* value, CPUIDLE proceeds as + normally. Otherwise, the CPU is simply denied from entering the + idle state, leaving the timer hardware enabled. + + * Kernel preemption control (PREEMPT) + + :c:func:`__preempt_schedule_irq` reconciles the virtual interrupt + state - which has not been touched by the assembly level code upon + kernel entry - with basic assumptions made by the scheduler core, + such as entering with interrupts disabled. It should be called by + the arch-specific assembly code in replacement of + :c:func:`preempt_schedule_irq`, from the call site dealing with + kernel preemption upon return from IRQ or system call. + +- Timer management + + * Timer interposition + +.. _timer: + The timer interposition mechanism is designed for handing over + control of the hardware tick device in use by the kernel to an + out-of-band timing logic. Typically, a real-time co-kernel would + make good use of this feature, for grabbing control over the timer + hardware. + + Once some out-of-band logic has grabbed control over the timer + device by calling :c:func:`ipipe_select_timers`, it can install + its own out-of-band handlers using :c:func:`ipipe_timer_start`. + From that point, it must carry out the timing requests from the + in-band timer core (e.g. hrtimers) in addition to its own timing + duties. + + In other words, once the interposition is set up, the + functionality of the tick device is shared between the in-band and + out-of-band contexts, with only the latter actually programming + the hardware. + + This mechanism is based on the clock event abstraction (`struct + clock_event_device`). Clock event devices which may be controlled + by this way need their drivers to be specifically adapted for such + use: + + + the interrupt handler receiving tick IRQs must be check with + :c:func:`clockevent_ipipe_stolen` whether they actually control + the hardware. A non-zero return from this routine means that it + does not, and therefore should skip the timer acknowledge + code, which would have run earlier in that case. + +- Generic locking & atomic + + * Generic atomic ops + +.. _atomic: + The effect of virtualizing interrupt protection must be reversed + for atomic helpers in and + , so that no interrupt can preempt + their execution, regardless of the stage their caller live + on. + + This is required to keep those helpers usable on data which + might be accessed concurrently from both stages. + + The usual way to revert such virtualization consists of delimiting + the protected section with :c:func:`hard_local_irq_save`, + :c:func:`hard_local_irq_restore` calls, in replacement for + :c:func:`local_irq_save`, :c:func:`local_irq_restore` + respectively. + + * Hard spinlocks + + The pipeline core introduces one more spinlock type: + + + *hard* spinlocks manipulate the CPU interrupt mask, and don't + affect the kernel preemption state in locking/unlocking + operations. + + This type of spinlock is useful for implementing a critical + section to serialize concurrent accesses from both in-band and + out-of-band contexts, i.e. from root and head stages. Obviously, + sleeping into a critical section protected by a hard spinlock + would be a very bad idea. + + In other words, hard spinlocks are not subject to virtual + interrupt masking, therefore can be used to serialize with + out-of-band activities, including from the in-band kernel + code. At any rate, those sections ought to be quite short, for + keeping latency low. + +- Drivers + + * IRQ chip drivers + + .. _irqchip: + irqchip drivers need to be specifically adapted for supporting the + pipelined interrupt model. The irqchip descriptor gains additional + handlers: + + + irq_chip.irq_hold is an optional handler called by the pipeline + core upon events from *level-triggered*, *fasteoi* and *percpu* + types. See Holding_ interrupt lines. + + When specified in the descriptor, irq_chip.irq_hold should + perform as follows, depending on the hardware acknowledge logic: + + + level -> mask[+ack] + + percpu -> mask[+ack][+eoi] + + fasteoi -> mask+eoi + + .. CAUTION:: proper acknowledge and/or EOI is important when + holding a line, as those operations may also + decrease the current interrupt priority level for + the CPU, allowing same or lower priority + out-of-band interrupts to be taken while the + initial IRQ might be deferred_ for the root stage. + + + irq_chip.irq_release is the converse operation to + irq_chip.irq_hold, releasing an interrupt line from the held + state. + + The :c:func:`ipipe_end_irq` routine invokes the available + handler for releasing the interrupt line. The pipeline core + calls :c:func:`irq_release` automatically for each IRQ which has + been accepted by an in-band handler (`IRQ_HANDLED` status). This + routine should be called explicitly by out-of-band handlers + before returning to their caller. + + `IRQCHIP_PIPELINE_SAFE` must be added to `struct irqchip::flags` + member of a pipeline-aware irqchip driver. + + .. NOTE:: :c:func:`irq_set_chip` will complain loudly with a + kernel warning whenever the irqchip descriptor passed + does not bear the `IRQCHIP_PIPELINE_SAFE` flag and + CONFIG_IPIPE is enabled. + +- Misc + + * :c:func:`printk` + + :c:func:`printk` may be called by out-of-band code safely, without + encurring extra latency. The output is delayed until the in-band + code resumes, and the console driver(s) can handle it. + + * Tracing core + + Tracepoints can be traversed by out-of-band code safely. Dynamic + tracing is available to a kernel running the pipelined interrupt + model too. + +Terminology +=========== + +.. _terminology: +====================== ======================================================= + Term Definition +====================== ======================================================= +Head stage high-priority execution context trigged by out-of-band IRQs +Root stage regular kernel context performing GPOS work +Out-of-band code code running over the head stage +In-band code code running over the root stage +Scheduler the regular, Linux kernel scheduler +Real-time scheduler the out-of-band task scheduling logic implemented on top of the I-pipe + +Resources +========= + +.. [#f1] Stodolsky, Chen & Bershad; "Fast Interrupt Priority Management in Operating System Kernels" + https://www.usenix.org/legacy/publications/library/proceedings/micro93/full_papers/stodolsky.txt +.. [#f2] Yaghmour, Karim; "ADEOS - Adaptive Domain Environment for Operating Systems" + https://www.opersys.com/ftp/pub/Adeos/adeos.pdf diff -ruN linux-org/drivers/base/core.c linux/drivers/base/core.c --- linux-org/drivers/base/core.c 2022-03-25 09:55:37.617469589 +0100 +++ linux/drivers/base/core.c 2022-03-25 10:15:23.353003010 +0100 @@ -2918,6 +2918,17 @@ static void __dev_printk(const char *level, const struct device *dev, struct va_format *vaf) { +#ifdef CONFIG_IPIPE + /* + * Console logging only if hard locked, or over the head + * stage. + */ + if (hard_irqs_disabled() || !ipipe_root_p) { + __ipipe_log_printk(vaf->fmt, *vaf->va); + return; + } +#endif + if (dev) dev_printk_emit(level[1] - '0', dev, "%s %s: %pV", dev_driver_string(dev), dev_name(dev), vaf); diff -ruN linux-org/drivers/clocksource/arm_arch_timer.c linux/drivers/clocksource/arm_arch_timer.c --- linux-org/drivers/clocksource/arm_arch_timer.c 2022-03-25 09:55:37.777468986 +0100 +++ linux/drivers/clocksource/arm_arch_timer.c 2022-03-25 10:15:23.345003040 +0100 @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include #include @@ -549,8 +551,7 @@ #define arch_timer_this_cpu_has_cntvct_wa() ({false;}) #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ -static __always_inline irqreturn_t timer_handler(const int access, - struct clock_event_device *evt) +static int arch_timer_ack(const int access, struct clock_event_device *evt) { unsigned long ctrl; @@ -558,6 +559,52 @@ if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { ctrl |= ARCH_TIMER_CTRL_IT_MASK; arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt); + return 1; + } + return 0; +} + +#ifdef CONFIG_IPIPE +static DEFINE_PER_CPU(struct ipipe_timer, arch_itimer); +static struct __ipipe_tscinfo tsc_info = { + .type = IPIPE_TSC_TYPE_FREERUNNING_ARCH, + .u = { + { + .mask = 0xffffffffffffffff, + }, + }, +}; + +static void arch_itimer_ack_phys(void) +{ + struct clock_event_device *evt = this_cpu_ptr(arch_timer_evt); + arch_timer_ack(ARCH_TIMER_PHYS_ACCESS, evt); +} + +static void arch_itimer_ack_virt(void) +{ + struct clock_event_device *evt = this_cpu_ptr(arch_timer_evt); + arch_timer_ack(ARCH_TIMER_VIRT_ACCESS, evt); +} +#endif /* CONFIG_IPIPE */ + +static inline irqreturn_t timer_handler(int irq, const int access, + struct clock_event_device *evt) +{ + if (clockevent_ipipe_stolen(evt)) + goto stolen; + + if (arch_timer_ack(access, evt)) { +#ifdef CONFIG_IPIPE + struct ipipe_timer *itimer = raw_cpu_ptr(&arch_itimer); + if (itimer->irq != irq) + itimer->irq = irq; +#endif /* CONFIG_IPIPE */ + stolen: + /* + * This is a 64bit clock source, no need for TSC + * update. + */ evt->event_handler(evt); return IRQ_HANDLED; } @@ -569,28 +616,28 @@ { struct clock_event_device *evt = dev_id; - return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt); + return timer_handler(irq, ARCH_TIMER_VIRT_ACCESS, evt); } static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) { struct clock_event_device *evt = dev_id; - return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); + return timer_handler(irq, ARCH_TIMER_PHYS_ACCESS, evt); } static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id) { struct clock_event_device *evt = dev_id; - return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt); + return timer_handler(irq, ARCH_TIMER_MEM_PHYS_ACCESS, evt); } static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id) { struct clock_event_device *evt = dev_id; - return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt); + return timer_handler(irq, ARCH_TIMER_MEM_VIRT_ACCESS, evt); } static __always_inline int timer_shutdown(const int access, @@ -704,6 +751,17 @@ } arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); +#ifdef CONFIG_IPIPE + clk->ipipe_timer = raw_cpu_ptr(&arch_itimer); + if (arch_timer_mem_use_virtual) { + clk->ipipe_timer->irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI]; + clk->ipipe_timer->ack = arch_itimer_ack_virt; + } else { + clk->ipipe_timer->irq = arch_timer_ppi[ARCH_TIMER_PHYS_SECURE_PPI]; + clk->ipipe_timer->ack = arch_itimer_ack_phys; + } + clk->ipipe_timer->freq = arch_timer_rate; +#endif } else { clk->features |= CLOCK_EVT_FEAT_DYNIRQ; clk->name = "arch_mem_timer"; @@ -777,6 +835,9 @@ else cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; +#ifdef CONFIG_IPIPE + cntkctl |= ARCH_TIMER_USR_PCT_ACCESS_EN; +#endif arch_timer_set_cntkctl(cntkctl); } @@ -901,6 +962,10 @@ arch_timer_read_counter = arch_counter_get_cntvct_mem; } +#ifdef CONFIG_IPIPE + tsc_info.freq = arch_timer_rate; + __ipipe_tsc_register(&tsc_info); +#endif /* CONFIG_IPIPE */ if (!arch_counter_suspend_stop) clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; start_count = arch_timer_read_counter(); diff -ruN linux-org/drivers/clocksource/arm_global_timer.c linux/drivers/clocksource/arm_global_timer.c --- linux-org/drivers/clocksource/arm_global_timer.c 2022-03-25 09:55:37.777468986 +0100 +++ linux/drivers/clocksource/arm_global_timer.c 2022-03-25 10:15:23.345003040 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -49,10 +50,69 @@ * the units for all operations. */ static void __iomem *gt_base; +static unsigned long gt_pbase; +static struct clk *gt_clk; static unsigned long gt_clk_rate; static int gt_ppi; static struct clock_event_device __percpu *gt_evt; +#ifdef CONFIG_IPIPE + +static struct clocksource gt_clocksource; + +static int gt_clockevent_ack(struct clock_event_device *evt); + +static DEFINE_PER_CPU(struct ipipe_timer, gt_itimer); + +static unsigned int refresh_gt_freq(void) +{ + gt_clk_rate = clk_get_rate(gt_clk); + + __clocksource_update_freq_hz(>_clocksource, gt_clk_rate); + + return gt_clk_rate; +} + +static inline void gt_ipipe_cs_setup(void) +{ + struct __ipipe_tscinfo tsc_info = { + .type = IPIPE_TSC_TYPE_FREERUNNING, + .freq = gt_clk_rate, + .counter_vaddr = (unsigned long)gt_base, + .u = { + { + .counter_paddr = gt_pbase, + .mask = 0xffffffff, + } + }, + .refresh_freq = refresh_gt_freq, + }; + + __ipipe_tsc_register(&tsc_info); +} + +static void gt_itimer_ack(void) +{ + struct clock_event_device *evt = this_cpu_ptr(gt_evt); + gt_clockevent_ack(evt); +} + +static inline void gt_ipipe_evt_setup(struct clock_event_device *evt) +{ + evt->ipipe_timer = this_cpu_ptr(>_itimer); + evt->ipipe_timer->irq = evt->irq; + evt->ipipe_timer->ack = gt_itimer_ack; + evt->ipipe_timer->freq = gt_clk_rate; +} + +#else + +static inline void gt_ipipe_cs_setup(void) { } + +static inline void gt_ipipe_evt_setup(struct clock_event_device *evt) { } + +#endif /* CONFIG_IPIPE */ + /* * To get the value from the Global Timer Counter register proceed as follows: * 1. Read the upper 32-bit timer counter register @@ -137,13 +197,11 @@ return 0; } -static irqreturn_t gt_clockevent_interrupt(int irq, void *dev_id) +static int gt_clockevent_ack(struct clock_event_device *evt) { - struct clock_event_device *evt = dev_id; - if (!(readl_relaxed(gt_base + GT_INT_STATUS) & GT_INT_STATUS_EVENT_FLAG)) - return IRQ_NONE; + return IS_ENABLED(CONFIG_IPIPE); /** * ERRATA 740657( Global Timer can send 2 interrupts for @@ -156,10 +214,23 @@ * the Global Timer flag _after_ having incremented * the Comparator register value to a higher value. */ - if (clockevent_state_oneshot(evt)) + if (clockevent_ipipe_stolen(evt) || clockevent_state_oneshot(evt)) gt_compare_set(ULONG_MAX, 0); writel_relaxed(GT_INT_STATUS_EVENT_FLAG, gt_base + GT_INT_STATUS); + + return 1; +} + +static irqreturn_t gt_clockevent_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + if (!clockevent_ipipe_stolen(evt)) { + if (!gt_clockevent_ack(evt)) + return IRQ_NONE; + } + evt->event_handler(evt); return IRQ_HANDLED; @@ -180,6 +251,7 @@ clk->cpumask = cpumask_of(cpu); clk->rating = 300; clk->irq = gt_ppi; + gt_ipipe_evt_setup(clk); clockevents_config_and_register(clk, gt_clk_rate, 1, 0xffffffff); enable_percpu_irq(clk->irq, IRQ_TYPE_NONE); @@ -252,13 +324,14 @@ #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK sched_clock_register(gt_sched_clock_read, 64, gt_clk_rate); #endif + gt_ipipe_cs_setup(); return clocksource_register_hz(>_clocksource, gt_clk_rate); } static int __init global_timer_of_register(struct device_node *np) { - struct clk *gt_clk; int err = 0; + struct resource res; /* * In A9 r2p0 the comparators for each processor with the global timer @@ -283,6 +356,11 @@ return -ENXIO; } + if (of_address_to_resource(np, 0, &res)) + res.start = 0; + + gt_pbase = res.start; + gt_clk = of_clk_get(np, 0); if (!IS_ERR(gt_clk)) { err = clk_prepare_enable(gt_clk); diff -ruN linux-org/drivers/clocksource/bcm2835_timer.c linux/drivers/clocksource/bcm2835_timer.c --- linux-org/drivers/clocksource/bcm2835_timer.c 2022-03-25 09:55:37.777468986 +0100 +++ linux/drivers/clocksource/bcm2835_timer.c 2022-03-25 10:15:23.345003040 +0100 @@ -29,6 +29,9 @@ #include #include #include +#include +#include +#include #include @@ -39,6 +42,7 @@ #define MAX_TIMER 3 #define DEFAULT_TIMER 3 + struct bcm2835_timer { void __iomem *control; void __iomem *compare; @@ -46,9 +50,53 @@ struct clock_event_device evt; struct irqaction act; }; - static void __iomem *system_clock __read_mostly; +#ifdef CONFIG_IPIPE + +static void __iomem *t_base; +static unsigned long t_pbase; + +static inline void bcm2835_ipipe_cs_setup(unsigned int freq) +{ + struct __ipipe_tscinfo tsc_info = { + .type = IPIPE_TSC_TYPE_FREERUNNING, + .freq = freq, + .counter_vaddr = (unsigned long)t_base + 0x04, + .u = { + { + .counter_paddr = t_pbase + 0x04, + .mask = 0xffffffff, + } + }, + }; + + __ipipe_tsc_register(&tsc_info); +} + +static struct ipipe_timer bcm2835_itimer; + +static void bcm2835_itimer_ack(void) +{ + struct bcm2835_timer *timer = container_of(bcm2835_itimer.host_timer, + struct bcm2835_timer, evt); + writel(timer->match_mask, timer->control); +} + +static inline void bcm2835_ipipe_evt_setup(struct clock_event_device *evt, + int freq) +{ + evt->ipipe_timer = &bcm2835_itimer; + evt->ipipe_timer->irq = evt->irq; + evt->ipipe_timer->ack = bcm2835_itimer_ack; + evt->ipipe_timer->freq = freq; +} + +#else +static inline void bcm2835_ipipe_cs_setup(void) { } +static inline void bcm2835_ipipe_evt_setup(struct clock_event_device *evt) { } +#endif /* CONFIG_IPIPE */ + static u64 notrace bcm2835_sched_read(void) { return readl_relaxed(system_clock); @@ -59,8 +107,7 @@ { struct bcm2835_timer *timer = container_of(evt_dev, struct bcm2835_timer, evt); - writel_relaxed(readl_relaxed(system_clock) + event, - timer->compare); + writel_relaxed(readl_relaxed(system_clock) + event, timer->compare); return 0; } @@ -68,16 +115,23 @@ { struct bcm2835_timer *timer = dev_id; void (*event_handler)(struct clock_event_device *); - if (readl_relaxed(timer->control) & timer->match_mask) { - writel_relaxed(timer->match_mask, timer->control); - event_handler = ACCESS_ONCE(timer->evt.event_handler); - if (event_handler) - event_handler(&timer->evt); + if (clockevent_ipipe_stolen(&timer->evt)) { + goto handle; + } + + if (readl_relaxed(timer->control) & timer->match_mask) { + writel_relaxed(timer->match_mask, timer->control); + handle: + event_handler = ACCESS_ONCE(timer->evt.event_handler); + __ipipe_tsc_update(); + if (event_handler) { + event_handler(&timer->evt); + } return IRQ_HANDLED; } else { - return IRQ_NONE; - } + return IRQ_NONE; + } } static int __init bcm2835_timer_init(struct device_node *node) @@ -93,6 +147,17 @@ return -ENXIO; } + if (IS_ENABLED(CONFIG_IPIPE)) { + struct resource res; + int ret; + + ret = of_address_to_resource(node, 0, &res); + if (ret) + res.start = 0; + t_base = base; + t_pbase = res.start; + } + ret = of_property_read_u32(node, "clock-frequency", &freq); if (ret) { pr_err("Can't read clock-frequency\n"); @@ -127,11 +192,22 @@ timer->evt.set_next_event = bcm2835_time_set_next_event; timer->evt.cpumask = cpumask_of(0); timer->act.name = node->name; - timer->act.flags = IRQF_TIMER | IRQF_SHARED; + timer->act.flags = IRQF_TIMER; timer->act.dev_id = timer; timer->act.handler = bcm2835_time_interrupt; - ret = setup_irq(irq, &timer->act); + if (IS_ENABLED(CONFIG_IPIPE)) { + bcm2835_ipipe_cs_setup(freq); + bcm2835_ipipe_evt_setup(&timer->evt, freq); + timer->evt.ipipe_timer = &bcm2835_itimer; + timer->evt.ipipe_timer->irq = irq; + timer->evt.ipipe_timer->ack = bcm2835_itimer_ack; + timer->evt.ipipe_timer->freq = freq; + } else { + timer->act.flags |= IRQF_SHARED; + } + + ret = setup_irq(irq, &timer->act); if (ret) { pr_err("Can't set up timer IRQ\n"); goto err_iounmap; diff -ruN linux-org/drivers/clocksource/dw_apb_timer.c linux/drivers/clocksource/dw_apb_timer.c --- linux-org/drivers/clocksource/dw_apb_timer.c 2022-03-25 09:55:37.777468986 +0100 +++ linux/drivers/clocksource/dw_apb_timer.c 2022-03-25 10:15:23.345003040 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -384,7 +385,7 @@ */ struct dw_apb_clocksource * dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base, - unsigned long freq) + unsigned long phys, unsigned long freq) { struct dw_apb_clocksource *dw_cs = kzalloc(sizeof(*dw_cs), GFP_KERNEL); @@ -399,10 +400,22 @@ dw_cs->cs.mask = CLOCKSOURCE_MASK(32); dw_cs->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; dw_cs->cs.resume = apbt_restart_clocksource; + dw_cs->phys = phys; return dw_cs; } +#ifdef CONFIG_IPIPE +static struct __ipipe_tscinfo apb_tsc_info = { + .type = IPIPE_TSC_TYPE_FREERUNNING_COUNTDOWN, + .u = { + .dec = { + .mask = 0xffffffffU, + }, + }, +}; +#endif + /** * dw_apb_clocksource_register() - register the APB clocksource. * @@ -411,6 +424,12 @@ void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs) { clocksource_register_hz(&dw_cs->cs, dw_cs->timer.freq); +#ifdef CONFIG_IPIPE + apb_tsc_info.u.dec.counter = (void *)(dw_cs->phys + APBTMR_N_CURRENT_VALUE); + apb_tsc_info.counter_vaddr = (unsigned long)dw_cs->timer.base + APBTMR_N_CURRENT_VALUE; + apb_tsc_info.freq = dw_cs->timer.freq; + __ipipe_tsc_register(&apb_tsc_info); +#endif } /** diff -ruN linux-org/drivers/clocksource/dw_apb_timer_of.c linux/drivers/clocksource/dw_apb_timer_of.c --- linux-org/drivers/clocksource/dw_apb_timer_of.c 2022-03-25 09:55:37.777468986 +0100 +++ linux/drivers/clocksource/dw_apb_timer_of.c 2022-03-25 10:15:23.345003040 +0100 @@ -25,16 +25,20 @@ #include static void __init timer_get_base_and_rate(struct device_node *np, - void __iomem **base, u32 *rate) + void __iomem **base, unsigned long *phys, + u32 *rate) { struct clk *timer_clk; + struct resource res; struct clk *pclk; *base = of_iomap(np, 0); - if (!*base) + if (!*base || of_address_to_resource(np, 0, &res)) panic("Unable to map regs for %s", np->name); + *phys = res.start; + /* * Not all implementations use a periphal clock, so don't panic * if it's not present @@ -64,13 +68,14 @@ { void __iomem *iobase; struct dw_apb_clock_event_device *ced; + unsigned long phys; u32 irq, rate; irq = irq_of_parse_and_map(event_timer, 0); if (irq == 0) panic("No IRQ for clock event timer"); - timer_get_base_and_rate(event_timer, &iobase, &rate); + timer_get_base_and_rate(event_timer, &iobase, &phys, &rate); ced = dw_apb_clockevent_init(0, event_timer->name, 300, iobase, irq, rate); @@ -87,11 +92,12 @@ { void __iomem *iobase; struct dw_apb_clocksource *cs; + unsigned long phys; u32 rate; - timer_get_base_and_rate(source_timer, &iobase, &rate); + timer_get_base_and_rate(source_timer, &iobase, &phys, &rate); - cs = dw_apb_clocksource_init(300, source_timer->name, iobase, rate); + cs = dw_apb_clocksource_init(300, source_timer->name, iobase, phys, rate); if (!cs) panic("Unable to initialise clocksource device"); @@ -120,11 +126,12 @@ static void __init init_sched_clock(void) { struct device_node *sched_timer; + unsigned long phys; sched_timer = of_find_matching_node(NULL, sptimer_ids); if (sched_timer) { timer_get_base_and_rate(sched_timer, &sched_io_base, - &sched_rate); + &phys, &sched_rate); of_node_put(sched_timer); } diff -ruN linux-org/drivers/clocksource/timer-imx-gpt.c linux/drivers/clocksource/timer-imx-gpt.c --- linux-org/drivers/clocksource/timer-imx-gpt.c 2022-03-25 09:55:37.781468971 +0100 +++ linux/drivers/clocksource/timer-imx-gpt.c 2022-03-25 10:15:23.345003040 +0100 @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include /* @@ -77,6 +79,9 @@ struct imx_timer { enum imx_gpt_type type; +#ifdef CONFIG_IPIPE + unsigned long pbase; +#endif void __iomem *base; int irq; struct clk *clk_per; @@ -281,6 +286,30 @@ return 0; } +#ifdef CONFIG_IPIPE + +static struct imx_timer *global_imx_timer; + +static void mxc_timer_ack(void) +{ + global_imx_timer->gpt->gpt_irq_acknowledge(global_imx_timer); +} + +static struct __ipipe_tscinfo tsc_info = { + .type = IPIPE_TSC_TYPE_FREERUNNING, + .u = { + { + .mask = 0xffffffff, + }, + }, +}; + +static struct ipipe_timer mxc_itimer = { + .ack = mxc_timer_ack, +}; + +#endif + /* * IRQ handler for the timer */ @@ -292,7 +321,8 @@ tstat = readl_relaxed(imxtm->base + imxtm->gpt->reg_tstat); - imxtm->gpt->gpt_irq_acknowledge(imxtm); + if (!clockevent_ipipe_stolen(ced)) + imxtm->gpt->gpt_irq_acknowledge(imxtm); ced->event_handler(ced); @@ -313,6 +343,9 @@ ced->rating = 200; ced->cpumask = cpumask_of(0); ced->irq = imxtm->irq; +#ifdef CONFIG_IPIPE + ced->ipipe_timer = &mxc_itimer; +#endif clockevents_config_and_register(ced, clk_get_rate(imxtm->clk_per), 0xff, 0xfffffffe); @@ -452,6 +485,17 @@ if (ret) return ret; +#ifdef CONFIG_IPIPE + tsc_info.u.counter_paddr = imxtm->pbase + imxtm->gpt->reg_tcn; + tsc_info.counter_vaddr = (unsigned long)imxtm->base + imxtm->gpt->reg_tcn; + tsc_info.freq = clk_get_rate(imxtm->clk_per); + __ipipe_tsc_register(&tsc_info); + mxc_itimer.irq = imxtm->irq; + mxc_itimer.freq = clk_get_rate(imxtm->clk_per); + mxc_itimer.min_delay_ticks = ipipe_timer_ns2ticks(&mxc_itimer, 2000); + global_imx_timer = imxtm; +#endif /* CONFIG_IPIPE */ + return mxc_clockevent_init(imxtm); } @@ -467,6 +511,9 @@ imxtm->base = ioremap(pbase, SZ_4K); BUG_ON(!imxtm->base); +#ifdef CONFIG_IPIPE + imxtm->pbase = pbase; +#endif imxtm->type = type; imxtm->irq = irq; @@ -478,6 +525,7 @@ { struct imx_timer *imxtm; static int initialized; + struct resource res; int ret; /* Support one instance only */ @@ -496,6 +544,13 @@ if (imxtm->irq <= 0) return -EINVAL; + if (of_address_to_resource(np, 0, &res)) + res.start = 0; + +#ifdef CONFIG_IPIPE + imxtm->pbase = res.start; +#endif + imxtm->clk_ipg = of_clk_get_by_name(np, "ipg"); /* Try osc_per first, and fall back to per otherwise */ diff -ruN linux-org/drivers/clocksource/timer-sp804.c linux/drivers/clocksource/timer-sp804.c --- linux-org/drivers/clocksource/timer-sp804.c 2022-03-25 09:55:37.781468971 +0100 +++ linux/drivers/clocksource/timer-sp804.c 2022-03-25 10:15:23.345003040 +0100 @@ -29,11 +29,25 @@ #include #include #include +#include +#include +#include #include #include "timer-sp.h" +#ifdef CONFIG_IPIPE +static struct __ipipe_tscinfo tsc_info = { + .type = IPIPE_TSC_TYPE_FREERUNNING_COUNTDOWN, + .u = { + { + .mask = 0xffffffff, + }, + }, +}; +#endif /* CONFIG_IPIPE */ + static long __init sp804_get_clock_rate(struct clk *clk) { long rate; @@ -78,6 +92,7 @@ } int __init __sp804_clocksource_and_sched_clock_init(void __iomem *base, + unsigned long phys, const char *name, struct clk *clk, int use_sched_clock) @@ -112,6 +127,12 @@ sched_clock_register(sp804_read, 32, rate); } +#ifdef CONFIG_IPIPE + tsc_info.freq = rate; + tsc_info.counter_vaddr = (unsigned long)base + TIMER_VALUE; + tsc_info.u.counter_paddr = phys + TIMER_VALUE; + __ipipe_tsc_register(&tsc_info); +#endif return 0; } @@ -226,6 +247,7 @@ u32 irq_num = 0; struct clk *clk1, *clk2; const char *name = of_get_property(np, "compatible", NULL); + struct resource res; base = of_iomap(np, 0); if (!base) @@ -259,6 +281,9 @@ if (irq <= 0) goto err; + if (of_address_to_resource(np, 0, &res)) + res.start = 0; + of_property_read_u32(np, "arm,sp804-has-irq", &irq_num); if (irq_num == 2) { @@ -266,7 +291,7 @@ if (ret) goto err; - ret = __sp804_clocksource_and_sched_clock_init(base, name, clk1, 1); + ret = __sp804_clocksource_and_sched_clock_init(base, res.start, name, clk1, 1); if (ret) goto err; } else { @@ -276,7 +301,7 @@ goto err; ret =__sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE, - name, clk2, 1); + res.start, name, clk2, 1); if (ret) goto err; } @@ -296,6 +321,7 @@ int irq, ret = -EINVAL; const char *name = of_get_property(np, "compatible", NULL); struct clk *clk; + struct resource res; base = of_iomap(np, 0); if (!base) { @@ -315,8 +341,11 @@ if (init_count == 2 || !of_device_is_available(np)) goto err; + if (of_address_to_resource(np, 0, &res)) + res.start = 0; + if (!init_count) { - ret = __sp804_clocksource_and_sched_clock_init(base, name, clk, 0); + ret = __sp804_clocksource_and_sched_clock_init(base, res.start, name, clk, 0); if (ret) goto err; } else { diff -ruN linux-org/drivers/cpuidle/cpuidle.c linux/drivers/cpuidle/cpuidle.c --- linux-org/drivers/cpuidle/cpuidle.c 2022-03-25 09:55:37.797468911 +0100 +++ linux/drivers/cpuidle/cpuidle.c 2022-03-25 10:15:23.353003010 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -196,6 +197,15 @@ s64 diff; /* + * A co-kernel running on the head stage of the IRQ pipeline + * may deny this switch. + */ + if (!ipipe_enter_cpuidle(dev, target_state)) { + ipipe_exit_cpuidle(); + return -EBUSY; + } + + /* * Tell the time framework to switch to a broadcast timer because our * local timer will be shut down. If a local timer is used from another * CPU as a broadcast timer, this call may fail if it is not available. @@ -255,6 +265,8 @@ dev->last_residency = 0; } + ipipe_exit_cpuidle(); + return entered_state; } diff -ruN linux-org/drivers/cpuidle/Kconfig linux/drivers/cpuidle/Kconfig --- linux-org/drivers/cpuidle/Kconfig 2022-03-25 09:55:37.793468925 +0100 +++ linux/drivers/cpuidle/Kconfig 2022-03-25 10:15:23.353003010 +0100 @@ -3,6 +3,7 @@ config CPU_IDLE bool "CPU idle PM support" default y if ACPI || PPC_PSERIES + depends on !(ARCH_OMAP4 && IPIPE) select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE) select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE) help diff -ruN linux-org/drivers/gpio/gpio-davinci.c linux/drivers/gpio/gpio-davinci.c --- linux-org/drivers/gpio/gpio-davinci.c 2022-03-25 09:55:37.913468474 +0100 +++ linux/drivers/gpio/gpio-davinci.c 2022-03-25 10:15:23.345003040 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include struct davinci_gpio_regs { u32 dir; @@ -301,7 +302,7 @@ .irq_enable = gpio_irq_enable, .irq_disable = gpio_irq_disable, .irq_set_type = gpio_irq_type, - .flags = IRQCHIP_SET_TYPE_MASKED, + .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_PIPELINE_SAFE, }; static void gpio_irq_handler(struct irq_desc *desc) @@ -344,7 +345,7 @@ */ hw_irq = (bank_num / 2) * 32 + bit; - generic_handle_irq( + ipipe_handle_demuxed_irq( irq_find_mapping(d->irq_domain, hw_irq)); } } diff -ruN linux-org/drivers/gpio/gpio-mvebu.c linux/drivers/gpio/gpio-mvebu.c --- linux-org/drivers/gpio/gpio-mvebu.c 2022-03-25 09:55:37.921468444 +0100 +++ linux/drivers/gpio/gpio-mvebu.c 2022-03-25 10:15:23.345003040 +0100 @@ -50,6 +50,7 @@ #include #include #include +#include #include "gpiolib.h" @@ -392,10 +393,11 @@ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct mvebu_gpio_chip *mvchip = gc->private; u32 mask = d->mask; + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); mvebu_gpio_write_edge_cause(mvchip, ~mask); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } static void mvebu_gpio_edge_irq_mask(struct irq_data *d) @@ -404,11 +406,12 @@ struct mvebu_gpio_chip *mvchip = gc->private; struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); ct->mask_cache_priv &= ~mask; mvebu_gpio_write_edge_mask(mvchip, ct->mask_cache_priv); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } static void mvebu_gpio_edge_irq_unmask(struct irq_data *d) @@ -417,11 +420,12 @@ struct mvebu_gpio_chip *mvchip = gc->private; struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); ct->mask_cache_priv |= mask; mvebu_gpio_write_edge_mask(mvchip, ct->mask_cache_priv); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } static void mvebu_gpio_level_irq_mask(struct irq_data *d) @@ -430,11 +434,12 @@ struct mvebu_gpio_chip *mvchip = gc->private; struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); ct->mask_cache_priv &= ~mask; mvebu_gpio_write_level_mask(mvchip, ct->mask_cache_priv); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } static void mvebu_gpio_level_irq_unmask(struct irq_data *d) @@ -443,11 +448,12 @@ struct mvebu_gpio_chip *mvchip = gc->private; struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); ct->mask_cache_priv |= mask; mvebu_gpio_write_level_mask(mvchip, ct->mask_cache_priv); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } /***************************************************************************** @@ -581,7 +587,7 @@ polarity); } - generic_handle_irq(irq); + ipipe_handle_demuxed_irq(irq); } chained_irq_exit(chip, desc); @@ -1228,6 +1234,7 @@ ct->chip.irq_unmask = mvebu_gpio_level_irq_unmask; ct->chip.irq_set_type = mvebu_gpio_irq_set_type; ct->chip.name = mvchip->chip.label; + ct->chip.flags = IRQCHIP_PIPELINE_SAFE; ct = &gc->chip_types[1]; ct->type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING; @@ -1237,6 +1244,7 @@ ct->chip.irq_set_type = mvebu_gpio_irq_set_type; ct->handler = handle_edge_irq; ct->chip.name = mvchip->chip.label; + ct->chip.flags = IRQCHIP_PIPELINE_SAFE; /* * Setup the interrupt handlers. Each chip can have up to 4 diff -ruN linux-org/drivers/gpio/gpio-mxc.c linux/drivers/gpio/gpio-mxc.c --- linux-org/drivers/gpio/gpio-mxc.c 2022-03-25 09:55:37.921468444 +0100 +++ linux/drivers/gpio/gpio-mxc.c 2022-03-25 10:15:23.345003040 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include enum mxc_gpio_hwtype { IMX1_GPIO, /* runs on i.mx1 */ @@ -267,7 +268,7 @@ if (port->both_edges & (1 << irqoffset)) mxc_flip_edge(port, irqoffset); - generic_handle_irq(irq_find_mapping(port->domain, irqoffset)); + ipipe_handle_demuxed_irq(irq_find_mapping(port->domain, irqoffset)); irq_stat &= ~(1 << irqoffset); } @@ -360,7 +361,7 @@ ct->chip.irq_unmask = irq_gc_mask_set_bit; ct->chip.irq_set_type = gpio_set_irq_type; ct->chip.irq_set_wake = gpio_set_wake_irq; - ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND; + ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE; ct->regs.ack = GPIO_ISR; ct->regs.mask = GPIO_IMR; diff -ruN linux-org/drivers/gpio/gpio-omap.c linux/drivers/gpio/gpio-omap.c --- linux-org/drivers/gpio/gpio-omap.c 2022-03-25 09:55:37.921468444 +0100 +++ linux/drivers/gpio/gpio-omap.c 2022-03-25 10:15:23.345003040 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #define OFF_MODE 1 @@ -58,7 +59,11 @@ u32 saved_datain; u32 level_mask; u32 toggle_mask; +#ifdef CONFIG_IPIPE + ipipe_spinlock_t lock; +#else raw_spinlock_t lock; +#endif raw_spinlock_t wa_lock; struct gpio_chip chip; struct clk *dbck; @@ -681,20 +686,17 @@ * line's interrupt handler has been run, we may miss some nested * interrupts. */ -static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) +static void __omap_gpio_irq_handler(struct gpio_bank *bank) { void __iomem *isr_reg = NULL; u32 enabled, isr, level_mask; unsigned int bit; - struct gpio_bank *bank = gpiobank; unsigned long wa_lock_flags; unsigned long lock_flags; isr_reg = bank->base + bank->regs->irqstatus; if (WARN_ON(!isr_reg)) - goto exit; - - pm_runtime_get_sync(bank->chip.parent); + return; while (1) { raw_spin_lock_irqsave(&bank->lock, lock_flags); @@ -737,18 +739,38 @@ raw_spin_lock_irqsave(&bank->wa_lock, wa_lock_flags); - generic_handle_irq(irq_find_mapping(bank->chip.irqdomain, + ipipe_handle_demuxed_irq(irq_find_mapping(bank->chip.irqdomain, bit)); raw_spin_unlock_irqrestore(&bank->wa_lock, wa_lock_flags); } } -exit: +} + +#ifdef CONFIG_IPIPE + +static void omap_gpio_irq_handler(struct irq_desc *d) +{ + struct gpio_bank *bank = irq_desc_get_handler_data(d); + __omap_gpio_irq_handler(bank); +} + +#else + +static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) +{ + struct gpio_bank *bank = gpiobank; + + pm_runtime_get_sync(bank->chip.parent); + __omap_gpio_irq_handler(bank); pm_runtime_put(bank->chip.parent); + return IRQ_HANDLED; } +#endif + static unsigned int omap_gpio_irq_startup(struct irq_data *d) { struct gpio_bank *bank = omap_irq_data_get_bank(d); @@ -830,6 +852,19 @@ raw_spin_unlock_irqrestore(&bank->lock, flags); } +static void omap_gpio_mask_ack_irq(struct irq_data *d) +{ + struct gpio_bank *bank = omap_irq_data_get_bank(d); + unsigned offset = d->hwirq; + unsigned long flags; + + raw_spin_lock_irqsave(&bank->lock, flags); + omap_set_gpio_irqenable(bank, offset, 0); + omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE); + omap_clear_gpio_irqstatus(bank, offset); + raw_spin_unlock_irqrestore(&bank->lock, flags); +} + static void omap_gpio_unmask_irq(struct irq_data *d) { struct gpio_bank *bank = omap_irq_data_get_bank(d); @@ -1128,11 +1163,16 @@ gpiochip_set_chained_irqchip(&bank->chip, irqc, bank->irq, NULL); +#ifdef CONFIG_IPIPE + irq_set_chained_handler_and_data(bank->irq, + omap_gpio_irq_handler, bank); +#else ret = devm_request_irq(bank->chip.parent, bank->irq, omap_gpio_irq_handler, 0, dev_name(bank->chip.parent), bank); if (ret) gpiochip_remove(&bank->chip); +#endif return ret; } @@ -1170,13 +1210,14 @@ irqc->irq_shutdown = omap_gpio_irq_shutdown, irqc->irq_ack = omap_gpio_ack_irq, irqc->irq_mask = omap_gpio_mask_irq, + irqc->irq_mask_ack = omap_gpio_mask_ack_irq, irqc->irq_unmask = omap_gpio_unmask_irq, irqc->irq_set_type = omap_gpio_irq_type, irqc->irq_set_wake = omap_gpio_wake_enable, irqc->irq_bus_lock = omap_gpio_irq_bus_lock, irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock, irqc->name = dev_name(&pdev->dev); - irqc->flags = IRQCHIP_MASK_ON_SUSPEND; + irqc->flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE; bank->irq = platform_get_irq(pdev, 0); if (bank->irq <= 0) { diff -ruN linux-org/drivers/gpio/gpio-pl061.c linux/drivers/gpio/gpio-pl061.c --- linux-org/drivers/gpio/gpio-pl061.c 2022-03-25 09:55:37.921468444 +0100 +++ linux/drivers/gpio/gpio-pl061.c 2022-03-25 10:15:23.345003040 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #define GPIODIR 0x400 #define GPIOIS 0x404 @@ -50,7 +51,11 @@ #endif struct pl061 { +#ifdef CONFIG_IPIPE + ipipe_spinlock_t lock; +#else raw_spinlock_t lock; +#endif void __iomem *base; struct gpio_chip gc; @@ -221,8 +226,8 @@ pending = readb(pl061->base + GPIOMIS); if (pending) { for_each_set_bit(offset, &pending, PL061_GPIO_NR) - generic_handle_irq(irq_find_mapping(gc->irqdomain, - offset)); + ipipe_handle_demuxed_irq(irq_find_mapping(gc->irqdomain, + offset)); } chained_irq_exit(irqchip, desc); @@ -233,6 +238,22 @@ struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct pl061 *pl061 = gpiochip_get_data(gc); u8 mask = BIT(irqd_to_hwirq(d) % PL061_GPIO_NR); + unsigned long flags; + u8 gpioie; + + raw_spin_lock_irqsave(&pl061->lock, flags); + gpioie = readb(pl061->base + GPIOIE) & ~mask; + writeb(gpioie, pl061->base + GPIOIE); + ipipe_lock_irq(d->irq); + raw_spin_unlock_irqrestore(&pl061->lock, flags); +} + +#ifdef CONFIG_IPIPE +static void pl061_irq_mask_ack(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct pl061 *pl061 = gpiochip_get_data(gc); + u8 mask = BIT(irqd_to_hwirq(d) % PL061_GPIO_NR); u8 gpioie; raw_spin_lock(&pl061->lock); @@ -240,6 +261,7 @@ writeb(gpioie, pl061->base + GPIOIE); raw_spin_unlock(&pl061->lock); } +#endif static void pl061_irq_unmask(struct irq_data *d) { @@ -288,6 +310,10 @@ .irq_unmask = pl061_irq_unmask, .irq_set_type = pl061_irq_type, .irq_set_wake = pl061_irq_set_wake, +#ifdef CONFIG_IPIPE + .irq_mask_ack = pl061_irq_mask_ack, + .flags = IRQCHIP_PIPELINE_SAFE, +#endif }; static int pl061_probe(struct amba_device *adev, const struct amba_id *id) diff -ruN linux-org/drivers/gpio/gpio-zynq.c linux/drivers/gpio/gpio-zynq.c --- linux-org/drivers/gpio/gpio-zynq.c 2022-03-25 09:55:37.925468429 +0100 +++ linux/drivers/gpio/gpio-zynq.c 2022-03-25 10:15:23.349003026 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,8 @@ struct gpio_regs context; }; +static IPIPE_DEFINE_RAW_SPINLOCK(zynq_gpio_lock); + /** * struct zynq_platform_data - zynq gpio platform data structure * @label: string to store in gpio->label @@ -302,6 +305,7 @@ u32 reg; unsigned int bank_num, bank_pin_num; struct zynq_gpio *gpio = gpiochip_get_data(chip); + unsigned long flags; zynq_gpio_get_bank_pin(pin, &bank_num, &bank_pin_num, gpio); @@ -313,10 +317,12 @@ (bank_pin_num == 7 || bank_pin_num == 8)) return -EINVAL; + raw_spin_lock_irqsave(&zynq_gpio_lock, flags); /* clear the bit in direction mode reg to set the pin as input */ reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num)); reg &= ~BIT(bank_pin_num); writel_relaxed(reg, gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num)); + raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags); return 0; } @@ -339,9 +345,11 @@ u32 reg; unsigned int bank_num, bank_pin_num; struct zynq_gpio *gpio = gpiochip_get_data(chip); + unsigned long flags; zynq_gpio_get_bank_pin(pin, &bank_num, &bank_pin_num, gpio); + raw_spin_lock_irqsave(&zynq_gpio_lock, flags); /* set the GPIO pin as output */ reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num)); reg |= BIT(bank_pin_num); @@ -351,6 +359,7 @@ reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_OUTEN_OFFSET(bank_num)); reg |= BIT(bank_pin_num); writel_relaxed(reg, gpio->base_addr + ZYNQ_GPIO_OUTEN_OFFSET(bank_num)); + raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags); /* set the state of the pin */ zynq_gpio_set_value(chip, pin, state); @@ -370,11 +379,15 @@ unsigned int device_pin_num, bank_num, bank_pin_num; struct zynq_gpio *gpio = gpiochip_get_data(irq_data_get_irq_chip_data(irq_data)); + unsigned long flags; device_pin_num = irq_data->hwirq; zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio); + raw_spin_lock_irqsave(&zynq_gpio_lock, flags); + ipipe_lock_irq(irq_data->irq); writel_relaxed(BIT(bank_pin_num), gpio->base_addr + ZYNQ_GPIO_INTDIS_OFFSET(bank_num)); + raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags); } /** @@ -391,11 +404,15 @@ unsigned int device_pin_num, bank_num, bank_pin_num; struct zynq_gpio *gpio = gpiochip_get_data(irq_data_get_irq_chip_data(irq_data)); + unsigned long flags; device_pin_num = irq_data->hwirq; zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio); + raw_spin_lock_irqsave(&zynq_gpio_lock, flags); writel_relaxed(BIT(bank_pin_num), gpio->base_addr + ZYNQ_GPIO_INTEN_OFFSET(bank_num)); + ipipe_unlock_irq(irq_data->irq); + raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags); } /** @@ -533,28 +550,68 @@ return 0; } +#ifdef CONFIG_IPIPE + +static void zynq_gpio_hold_irq(struct irq_data *irq_data) +{ + unsigned int device_pin_num, bank_num, bank_pin_num; + struct zynq_gpio *gpio = + gpiochip_get_data(irq_data_get_irq_chip_data(irq_data)); + + device_pin_num = irq_data->hwirq; + zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio); + raw_spin_lock(&zynq_gpio_lock); + writel_relaxed(BIT(bank_pin_num), + gpio->base_addr + ZYNQ_GPIO_INTDIS_OFFSET(bank_num)); + writel_relaxed(BIT(bank_pin_num), + gpio->base_addr + ZYNQ_GPIO_INTSTS_OFFSET(bank_num)); + raw_spin_unlock(&zynq_gpio_lock); +} + +static void zynq_gpio_release_irq(struct irq_data *irq_data) +{ + unsigned int device_pin_num, bank_num, bank_pin_num; + struct zynq_gpio *gpio = + gpiochip_get_data(irq_data_get_irq_chip_data(irq_data)); + + device_pin_num = irq_data->hwirq; + zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio); + writel_relaxed(BIT(bank_pin_num), + gpio->base_addr + ZYNQ_GPIO_INTEN_OFFSET(bank_num)); +} + +#endif /* CONFIG_IPIPE */ + /* irq chip descriptor */ static struct irq_chip zynq_gpio_level_irqchip = { - .name = DRIVER_NAME, + .name = DRIVER_NAME "-level", .irq_enable = zynq_gpio_irq_enable, .irq_eoi = zynq_gpio_irq_ack, +#ifdef CONFIG_IPIPE + .irq_hold = zynq_gpio_hold_irq, + .irq_release = zynq_gpio_release_irq, +#endif .irq_mask = zynq_gpio_irq_mask, .irq_unmask = zynq_gpio_irq_unmask, .irq_set_type = zynq_gpio_set_irq_type, .irq_set_wake = zynq_gpio_set_wake, .flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED | - IRQCHIP_MASK_ON_SUSPEND, + IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE, }; static struct irq_chip zynq_gpio_edge_irqchip = { - .name = DRIVER_NAME, + .name = DRIVER_NAME "-edge", .irq_enable = zynq_gpio_irq_enable, +#ifdef CONFIG_IPIPE + .irq_mask_ack = zynq_gpio_hold_irq, +#else .irq_ack = zynq_gpio_irq_ack, +#endif .irq_mask = zynq_gpio_irq_mask, .irq_unmask = zynq_gpio_irq_unmask, .irq_set_type = zynq_gpio_set_irq_type, .irq_set_wake = zynq_gpio_set_wake, - .flags = IRQCHIP_MASK_ON_SUSPEND, + .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE, }; static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio, @@ -572,7 +629,7 @@ unsigned int gpio_irq; gpio_irq = irq_find_mapping(irqdomain, offset + bank_offset); - generic_handle_irq(gpio_irq); + ipipe_handle_demuxed_irq(gpio_irq); } } diff -ruN linux-org/drivers/gpu/ipu-v3/ipu-common.c linux/drivers/gpu/ipu-v3/ipu-common.c --- linux-org/drivers/gpu/ipu-v3/ipu-common.c 2022-03-25 09:55:38.613465837 +0100 +++ linux/drivers/gpu/ipu-v3/ipu-common.c 2022-03-25 10:15:23.349003026 +0100 @@ -1080,7 +1080,7 @@ irq = irq_linear_revmap(ipu->domain, regs[i] * 32 + bit); if (irq) - generic_handle_irq(irq); + ipipe_handle_demuxed_irq(irq); } } } @@ -1304,6 +1304,7 @@ ct->chip.irq_ack = irq_gc_ack_set_bit; ct->chip.irq_mask = irq_gc_mask_clr_bit; ct->chip.irq_unmask = irq_gc_mask_set_bit; + ct->chip.flags = IRQCHIP_PIPELINE_SAFE; ct->regs.ack = IPU_INT_STAT(i / 32); ct->regs.mask = IPU_INT_CTRL(i / 32); } diff -ruN linux-org/drivers/gpu/ipu-v3/ipu-prv.h linux/drivers/gpu/ipu-v3/ipu-prv.h --- linux-org/drivers/gpu/ipu-v3/ipu-prv.h 2022-03-25 09:55:38.613465837 +0100 +++ linux/drivers/gpu/ipu-v3/ipu-prv.h 2022-03-25 10:15:23.349003026 +0100 @@ -179,7 +179,7 @@ struct device *dev; const struct ipu_devtype *devtype; enum ipuv3_type ipu_type; - spinlock_t lock; + ipipe_spinlock_t lock; struct mutex channel_lock; struct list_head channels; diff -ruN linux-org/drivers/irqchip/irq-atmel-aic5.c linux/drivers/irqchip/irq-atmel-aic5.c --- linux-org/drivers/irqchip/irq-atmel-aic5.c 2022-03-25 09:55:38.961464526 +0100 +++ linux/drivers/irqchip/irq-atmel-aic5.c 2022-03-25 10:15:23.349003026 +0100 @@ -80,7 +80,7 @@ if (!irqstat) irq_reg_writel(bgc, 0, AT91_AIC5_EOICR); else - handle_domain_irq(aic5_domain, irqnr, regs); + ipipe_handle_domain_irq(aic5_domain, irqnr, regs); } static void aic5_mask(struct irq_data *d) @@ -88,16 +88,18 @@ struct irq_domain *domain = d->domain; struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + unsigned long flags; /* * Disable interrupt on AIC5. We always take the lock of the * first irq chip as all chips share the same registers. */ - irq_gc_lock(bgc); + flags = irq_gc_lock(bgc); + ipipe_lock_irq(d->irq); irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); irq_reg_writel(gc, 1, AT91_AIC5_IDCR); gc->mask_cache &= ~d->mask; - irq_gc_unlock(bgc); + irq_gc_unlock(bgc, flags); } static void aic5_unmask(struct irq_data *d) @@ -105,28 +107,59 @@ struct irq_domain *domain = d->domain; struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + unsigned long flags; /* * Enable interrupt on AIC5. We always take the lock of the * first irq chip as all chips share the same registers. */ - irq_gc_lock(bgc); + flags = irq_gc_lock(bgc); irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); irq_reg_writel(gc, 1, AT91_AIC5_IECR); gc->mask_cache |= d->mask; - irq_gc_unlock(bgc); + ipipe_unlock_irq(d->irq); + irq_gc_unlock(bgc, flags); +} + +#ifdef CONFIG_IPIPE + +static void aic5_hold(struct irq_data *d) +{ + struct irq_domain *domain = d->domain; + struct irq_domain_chip_generic *dgc = domain->gc; + struct irq_chip_generic *gc = dgc->gc[0]; + + irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); + irq_reg_writel(gc, 1, AT91_AIC5_IDCR); + irq_reg_writel(gc, 0, AT91_AIC5_EOICR); +} + +static void aic5_release(struct irq_data *d) +{ + struct irq_domain *domain = d->domain; + struct irq_domain_chip_generic *dgc = domain->gc; + struct irq_chip_generic *gc = dgc->gc[0]; + unsigned long flags; + + flags = irq_gc_lock(gc); + irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR); + irq_reg_writel(gc, 1, AT91_AIC5_IECR); + irq_gc_unlock(gc, flags); } +#endif + static int aic5_retrigger(struct irq_data *d) { struct irq_domain *domain = d->domain; struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); + unsigned long flags; /* Enable interrupt on AIC5 */ - irq_gc_lock(bgc); + flags = irq_gc_lock(bgc); irq_reg_writel(bgc, d->hwirq, AT91_AIC5_SSR); irq_reg_writel(bgc, 1, AT91_AIC5_ISCR); - irq_gc_unlock(bgc); + irq_gc_unlock(bgc, flags); return 0; } @@ -135,16 +168,17 @@ { struct irq_domain *domain = d->domain; struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); + unsigned long flags; unsigned int smr; int ret; - irq_gc_lock(bgc); + flags = irq_gc_lock(bgc); irq_reg_writel(bgc, d->hwirq, AT91_AIC5_SSR); smr = irq_reg_readl(bgc, AT91_AIC5_SMR); ret = aic_common_set_type(d, type, &smr); if (!ret) irq_reg_writel(bgc, smr, AT91_AIC5_SMR); - irq_gc_unlock(bgc); + irq_gc_unlock(bgc, flags); return ret; } @@ -160,6 +194,7 @@ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); int i; u32 mask; + unsigned long flags; if (smr_cache) for (i = 0; i < domain->revmap_size; i++) { @@ -167,7 +202,7 @@ smr_cache[i] = irq_reg_readl(bgc, AT91_AIC5_SMR); } - irq_gc_lock(bgc); + flags = irq_gc_lock(bgc); for (i = 0; i < dgc->irqs_per_chip; i++) { mask = 1 << i; if ((mask & gc->mask_cache) == (mask & gc->wake_active)) @@ -179,7 +214,7 @@ else irq_reg_writel(bgc, 1, AT91_AIC5_IDCR); } - irq_gc_unlock(bgc); + irq_gc_unlock(bgc, flags); } static void aic5_resume(struct irq_data *d) @@ -190,8 +225,9 @@ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); int i; u32 mask; + unsigned long flags; - irq_gc_lock(bgc); + flags = irq_gc_lock(bgc); if (smr_cache) { irq_reg_writel(bgc, 0xffffffff, AT91_AIC5_SPU); @@ -215,7 +251,7 @@ else irq_reg_writel(bgc, 1, AT91_AIC5_IDCR); } - irq_gc_unlock(bgc); + irq_gc_unlock(bgc, flags); } static void aic5_pm_shutdown(struct irq_data *d) @@ -224,15 +260,16 @@ struct irq_domain_chip_generic *dgc = domain->gc; struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0); struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + unsigned long flags; int i; - irq_gc_lock(bgc); + flags = irq_gc_lock(bgc); for (i = 0; i < dgc->irqs_per_chip; i++) { irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR); irq_reg_writel(bgc, 1, AT91_AIC5_IDCR); irq_reg_writel(bgc, 1, AT91_AIC5_ICCR); } - irq_gc_unlock(bgc); + irq_gc_unlock(bgc, flags); } #else #define aic5_suspend NULL @@ -349,6 +386,11 @@ gc->chip_types[0].chip.irq_suspend = aic5_suspend; gc->chip_types[0].chip.irq_resume = aic5_resume; gc->chip_types[0].chip.irq_pm_shutdown = aic5_pm_shutdown; +#ifdef CONFIG_IPIPE + gc->chip_types[0].chip.irq_hold = aic5_hold; + gc->chip_types[0].chip.irq_release = aic5_release; + gc->chip_types[0].chip.flags = IRQCHIP_PIPELINE_SAFE; +#endif } aic5_hw_init(domain); diff -ruN linux-org/drivers/irqchip/irq-atmel-aic.c linux/drivers/irqchip/irq-atmel-aic.c --- linux-org/drivers/irqchip/irq-atmel-aic.c 2022-03-25 09:55:38.961464526 +0100 +++ linux/drivers/irqchip/irq-atmel-aic.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,274 +0,0 @@ -/* - * Atmel AT91 AIC (Advanced Interrupt Controller) driver - * - * Copyright (C) 2004 SAN People - * Copyright (C) 2004 ATMEL - * Copyright (C) Rick Bronson - * Copyright (C) 2014 Free Electrons - * - * Author: Boris BREZILLON - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "irq-atmel-aic-common.h" - -/* Number of irq lines managed by AIC */ -#define NR_AIC_IRQS 32 - -#define AT91_AIC_SMR(n) ((n) * 4) - -#define AT91_AIC_SVR(n) (0x80 + ((n) * 4)) -#define AT91_AIC_IVR 0x100 -#define AT91_AIC_FVR 0x104 -#define AT91_AIC_ISR 0x108 - -#define AT91_AIC_IPR 0x10c -#define AT91_AIC_IMR 0x110 -#define AT91_AIC_CISR 0x114 - -#define AT91_AIC_IECR 0x120 -#define AT91_AIC_IDCR 0x124 -#define AT91_AIC_ICCR 0x128 -#define AT91_AIC_ISCR 0x12c -#define AT91_AIC_EOICR 0x130 -#define AT91_AIC_SPU 0x134 -#define AT91_AIC_DCR 0x138 - -static struct irq_domain *aic_domain; - -static asmlinkage void __exception_irq_entry -aic_handle(struct pt_regs *regs) -{ - struct irq_domain_chip_generic *dgc = aic_domain->gc; - struct irq_chip_generic *gc = dgc->gc[0]; - u32 irqnr; - u32 irqstat; - - irqnr = irq_reg_readl(gc, AT91_AIC_IVR); - irqstat = irq_reg_readl(gc, AT91_AIC_ISR); - - if (!irqstat) - irq_reg_writel(gc, 0, AT91_AIC_EOICR); - else - handle_domain_irq(aic_domain, irqnr, regs); -} - -static int aic_retrigger(struct irq_data *d) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - - /* Enable interrupt on AIC5 */ - irq_gc_lock(gc); - irq_reg_writel(gc, d->mask, AT91_AIC_ISCR); - irq_gc_unlock(gc); - - return 0; -} - -static int aic_set_type(struct irq_data *d, unsigned type) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - unsigned int smr; - int ret; - - smr = irq_reg_readl(gc, AT91_AIC_SMR(d->hwirq)); - ret = aic_common_set_type(d, type, &smr); - if (ret) - return ret; - - irq_reg_writel(gc, smr, AT91_AIC_SMR(d->hwirq)); - - return 0; -} - -#ifdef CONFIG_PM -static void aic_suspend(struct irq_data *d) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - - irq_gc_lock(gc); - irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IDCR); - irq_reg_writel(gc, gc->wake_active, AT91_AIC_IECR); - irq_gc_unlock(gc); -} - -static void aic_resume(struct irq_data *d) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - - irq_gc_lock(gc); - irq_reg_writel(gc, gc->wake_active, AT91_AIC_IDCR); - irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IECR); - irq_gc_unlock(gc); -} - -static void aic_pm_shutdown(struct irq_data *d) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - - irq_gc_lock(gc); - irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR); - irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR); - irq_gc_unlock(gc); -} -#else -#define aic_suspend NULL -#define aic_resume NULL -#define aic_pm_shutdown NULL -#endif /* CONFIG_PM */ - -static void __init aic_hw_init(struct irq_domain *domain) -{ - struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0); - int i; - - /* - * Perform 8 End Of Interrupt Command to make sure AIC - * will not Lock out nIRQ - */ - for (i = 0; i < 8; i++) - irq_reg_writel(gc, 0, AT91_AIC_EOICR); - - /* - * Spurious Interrupt ID in Spurious Vector Register. - * When there is no current interrupt, the IRQ Vector Register - * reads the value stored in AIC_SPU - */ - irq_reg_writel(gc, 0xffffffff, AT91_AIC_SPU); - - /* No debugging in AIC: Debug (Protect) Control Register */ - irq_reg_writel(gc, 0, AT91_AIC_DCR); - - /* Disable and clear all interrupts initially */ - irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR); - irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR); - - for (i = 0; i < 32; i++) - irq_reg_writel(gc, i, AT91_AIC_SVR(i)); -} - -static int aic_irq_domain_xlate(struct irq_domain *d, - struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, - unsigned int *out_type) -{ - struct irq_domain_chip_generic *dgc = d->gc; - struct irq_chip_generic *gc; - unsigned long flags; - unsigned smr; - int idx; - int ret; - - if (!dgc) - return -EINVAL; - - ret = aic_common_irq_domain_xlate(d, ctrlr, intspec, intsize, - out_hwirq, out_type); - if (ret) - return ret; - - idx = intspec[0] / dgc->irqs_per_chip; - if (idx >= dgc->num_chips) - return -EINVAL; - - gc = dgc->gc[idx]; - - irq_gc_lock_irqsave(gc, flags); - smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq)); - aic_common_set_priority(intspec[2], &smr); - irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq)); - irq_gc_unlock_irqrestore(gc, flags); - - return ret; -} - -static const struct irq_domain_ops aic_irq_ops = { - .map = irq_map_generic_chip, - .xlate = aic_irq_domain_xlate, -}; - -static void __init at91rm9200_aic_irq_fixup(void) -{ - aic_common_rtc_irq_fixup(); -} - -static void __init at91sam9260_aic_irq_fixup(void) -{ - aic_common_rtt_irq_fixup(); -} - -static void __init at91sam9g45_aic_irq_fixup(void) -{ - aic_common_rtc_irq_fixup(); - aic_common_rtt_irq_fixup(); -} - -static const struct of_device_id aic_irq_fixups[] __initconst = { - { .compatible = "atmel,at91rm9200", .data = at91rm9200_aic_irq_fixup }, - { .compatible = "atmel,at91sam9g45", .data = at91sam9g45_aic_irq_fixup }, - { .compatible = "atmel,at91sam9n12", .data = at91rm9200_aic_irq_fixup }, - { .compatible = "atmel,at91sam9rl", .data = at91sam9g45_aic_irq_fixup }, - { .compatible = "atmel,at91sam9x5", .data = at91rm9200_aic_irq_fixup }, - { .compatible = "atmel,at91sam9260", .data = at91sam9260_aic_irq_fixup }, - { .compatible = "atmel,at91sam9261", .data = at91sam9260_aic_irq_fixup }, - { .compatible = "atmel,at91sam9263", .data = at91sam9260_aic_irq_fixup }, - { .compatible = "atmel,at91sam9g20", .data = at91sam9260_aic_irq_fixup }, - { /* sentinel */ }, -}; - -static int __init aic_of_init(struct device_node *node, - struct device_node *parent) -{ - struct irq_chip_generic *gc; - struct irq_domain *domain; - - if (aic_domain) - return -EEXIST; - - domain = aic_common_of_init(node, &aic_irq_ops, "atmel-aic", - NR_AIC_IRQS, aic_irq_fixups); - if (IS_ERR(domain)) - return PTR_ERR(domain); - - aic_domain = domain; - gc = irq_get_domain_generic_chip(domain, 0); - - gc->chip_types[0].regs.eoi = AT91_AIC_EOICR; - gc->chip_types[0].regs.enable = AT91_AIC_IECR; - gc->chip_types[0].regs.disable = AT91_AIC_IDCR; - gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg; - gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg; - gc->chip_types[0].chip.irq_retrigger = aic_retrigger; - gc->chip_types[0].chip.irq_set_type = aic_set_type; - gc->chip_types[0].chip.irq_suspend = aic_suspend; - gc->chip_types[0].chip.irq_resume = aic_resume; - gc->chip_types[0].chip.irq_pm_shutdown = aic_pm_shutdown; - - aic_hw_init(domain); - set_handle_irq(aic_handle); - - return 0; -} -IRQCHIP_DECLARE(at91rm9200_aic, "atmel,at91rm9200-aic", aic_of_init); diff -ruN linux-org/drivers/irqchip/irq-bcm2835.c linux/drivers/irqchip/irq-bcm2835.c --- linux-org/drivers/irqchip/irq-bcm2835.c 2022-03-25 09:55:38.961464526 +0100 +++ linux/drivers/irqchip/irq-bcm2835.c 2022-03-25 10:16:01.380859769 +0100 @@ -52,12 +52,9 @@ #include #include -#ifndef CONFIG_ARM64 -#include -#endif /* Put the bank and irq (32 bits) into the hwirq */ -#define MAKE_HWIRQ(b, n) (((b) << 5) | (n)) +#define MAKE_HWIRQ(b, n) ((b << 5) | (n)) #define HWIRQ_BANK(i) (i >> 5) #define HWIRQ_BIT(i) BIT(i & 0x1f) @@ -72,18 +69,10 @@ #define BANK0_VALID_MASK (BANK0_HWIRQ_MASK | BANK1_HWIRQ | BANK2_HWIRQ \ | SHORTCUT1_MASK | SHORTCUT2_MASK) -#undef ARM_LOCAL_GPU_INT_ROUTING -#define ARM_LOCAL_GPU_INT_ROUTING 0x0c - #define REG_FIQ_CONTROL 0x0c -#define REG_FIQ_ENABLE 0x80 -#define REG_FIQ_DISABLE 0 #define NR_BANKS 3 #define IRQS_PER_BANK 32 -#define NUMBER_IRQS MAKE_HWIRQ(NR_BANKS, 0) -#undef FIQ_START -#define FIQ_START (NR_IRQS_BANK0 + MAKE_HWIRQ(NR_BANKS - 1, 0)) static const int reg_pending[] __initconst = { 0x00, 0x04, 0x08 }; static const int reg_enable[] __initconst = { 0x18, 0x10, 0x14 }; @@ -101,7 +90,6 @@ void __iomem *enable[NR_BANKS]; void __iomem *disable[NR_BANKS]; struct irq_domain *domain; - void __iomem *local_base; }; static struct armctrl_ic intc __read_mostly; @@ -109,76 +97,20 @@ struct pt_regs *regs); static void bcm2836_chained_handle_irq(struct irq_desc *desc); -static inline unsigned int hwirq_to_fiq(unsigned long hwirq) -{ - hwirq -= NUMBER_IRQS; - /* - * The hwirq numbering used in this driver is: - * BASE (0-7) GPU1 (32-63) GPU2 (64-95). - * This differ from the one used in the FIQ register: - * GPU1 (0-31) GPU2 (32-63) BASE (64-71) - */ - if (hwirq >= 32) - return hwirq - 32; - - return hwirq + 64; -} - static void armctrl_mask_irq(struct irq_data *d) { - if (d->hwirq >= NUMBER_IRQS) - writel_relaxed(REG_FIQ_DISABLE, intc.base + REG_FIQ_CONTROL); - else - writel_relaxed(HWIRQ_BIT(d->hwirq), - intc.disable[HWIRQ_BANK(d->hwirq)]); + writel_relaxed(HWIRQ_BIT(d->hwirq), intc.disable[HWIRQ_BANK(d->hwirq)]); } static void armctrl_unmask_irq(struct irq_data *d) { - if (d->hwirq >= NUMBER_IRQS) { - if (num_online_cpus() > 1) { - unsigned int data; - - if (!intc.local_base) { - pr_err("FIQ is disabled due to missing arm_local_intc\n"); - return; - } - - data = readl_relaxed(intc.local_base + - ARM_LOCAL_GPU_INT_ROUTING); - - data &= ~0xc; - data |= (1 << 2); - writel_relaxed(data, - intc.local_base + - ARM_LOCAL_GPU_INT_ROUTING); - } - - writel_relaxed(REG_FIQ_ENABLE | hwirq_to_fiq(d->hwirq), - intc.base + REG_FIQ_CONTROL); - } else { - writel_relaxed(HWIRQ_BIT(d->hwirq), - intc.enable[HWIRQ_BANK(d->hwirq)]); - } -} - -#ifdef CONFIG_ARM64 -void bcm2836_arm_irqchip_spin_gpu_irq(void); - -static void armctrl_ack_irq(struct irq_data *d) -{ - bcm2836_arm_irqchip_spin_gpu_irq(); + writel_relaxed(HWIRQ_BIT(d->hwirq), intc.enable[HWIRQ_BANK(d->hwirq)]); } -#endif - static struct irq_chip armctrl_chip = { .name = "ARMCTRL-level", .irq_mask = armctrl_mask_irq, - .irq_unmask = armctrl_unmask_irq, -#ifdef CONFIG_ARM64 - .irq_ack = armctrl_ack_irq -#endif + .irq_unmask = armctrl_unmask_irq }; static int armctrl_xlate(struct irq_domain *d, struct device_node *ctrlr, @@ -217,9 +149,8 @@ if (!base) panic("%pOF: unable to map IC registers\n", node); - intc.base = base; - intc.domain = irq_domain_add_linear(node, NUMBER_IRQS * 2, - &armctrl_ops, NULL); + intc.domain = irq_domain_add_linear(node, MAKE_HWIRQ(NR_BANKS, 0), + &armctrl_ops, NULL); if (!intc.domain) panic("%pOF: unable to create IRQ domain\n", node); @@ -249,27 +180,6 @@ set_handle_irq(bcm2835_handle_irq); } - if (is_2836) { - extern void __iomem * __attribute__((weak)) arm_local_intc; - intc.local_base = arm_local_intc; - if (!intc.local_base) - pr_err("Failed to get local intc base. FIQ is disabled for cpus > 1\n"); - } - - /* Make a duplicate irq range which is used to enable FIQ */ - for (b = 0; b < NR_BANKS; b++) { - for (i = 0; i < bank_irqs[b]; i++) { - irq = irq_create_mapping(intc.domain, - MAKE_HWIRQ(b, i) + NUMBER_IRQS); - BUG_ON(irq <= 0); - irq_set_chip(irq, &armctrl_chip); - irq_set_probe(irq); - } - } -#ifndef CONFIG_ARM64 - init_FIQ(FIQ_START); -#endif - return 0; } diff -ruN linux-org/drivers/irqchip/irq-bcm2835.c.orig linux/drivers/irqchip/irq-bcm2835.c.orig --- linux-org/drivers/irqchip/irq-bcm2835.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux/drivers/irqchip/irq-bcm2835.c.orig 2022-03-25 10:14:32.457194722 +0100 @@ -0,0 +1,347 @@ +/* + * Copyright 2010 Broadcom + * Copyright 2012 Simon Arlott, Chris Boot, Stephen Warren + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Quirk 1: Shortcut interrupts don't set the bank 1/2 register pending bits + * + * If an interrupt fires on bank 1 that isn't in the shortcuts list, bit 8 + * on bank 0 is set to signify that an interrupt in bank 1 has fired, and + * to look in the bank 1 status register for more information. + * + * If an interrupt fires on bank 1 that _is_ in the shortcuts list, its + * shortcut bit in bank 0 is set as well as its interrupt bit in the bank 1 + * status register, but bank 0 bit 8 is _not_ set. + * + * Quirk 2: You can't mask the register 1/2 pending interrupts + * + * In a proper cascaded interrupt controller, the interrupt lines with + * cascaded interrupt controllers on them are just normal interrupt lines. + * You can mask the interrupts and get on with things. With this controller + * you can't do that. + * + * Quirk 3: The shortcut interrupts can't be (un)masked in bank 0 + * + * Those interrupts that have shortcuts can only be masked/unmasked in + * their respective banks' enable/disable registers. Doing so in the bank 0 + * enable/disable registers has no effect. + * + * The FIQ control register: + * Bits 0-6: IRQ (index in order of interrupts from banks 1, 2, then 0) + * Bit 7: Enable FIQ generation + * Bits 8+: Unused + * + * An interrupt must be disabled before configuring it for FIQ generation + * otherwise both handlers will fire at the same time! + */ + +#include +#include +#include +#include +#include +#include + +#include +#ifndef CONFIG_ARM64 +#include +#endif + +/* Put the bank and irq (32 bits) into the hwirq */ +#define MAKE_HWIRQ(b, n) (((b) << 5) | (n)) +#define HWIRQ_BANK(i) (i >> 5) +#define HWIRQ_BIT(i) BIT(i & 0x1f) + +#define NR_IRQS_BANK0 8 +#define BANK0_HWIRQ_MASK 0xff +/* Shortcuts can't be disabled so any unknown new ones need to be masked */ +#define SHORTCUT1_MASK 0x00007c00 +#define SHORTCUT2_MASK 0x001f8000 +#define SHORTCUT_SHIFT 10 +#define BANK1_HWIRQ BIT(8) +#define BANK2_HWIRQ BIT(9) +#define BANK0_VALID_MASK (BANK0_HWIRQ_MASK | BANK1_HWIRQ | BANK2_HWIRQ \ + | SHORTCUT1_MASK | SHORTCUT2_MASK) + +#undef ARM_LOCAL_GPU_INT_ROUTING +#define ARM_LOCAL_GPU_INT_ROUTING 0x0c + +#define REG_FIQ_CONTROL 0x0c +#define REG_FIQ_ENABLE 0x80 +#define REG_FIQ_DISABLE 0 + +#define NR_BANKS 3 +#define IRQS_PER_BANK 32 +#define NUMBER_IRQS MAKE_HWIRQ(NR_BANKS, 0) +#undef FIQ_START +#define FIQ_START (NR_IRQS_BANK0 + MAKE_HWIRQ(NR_BANKS - 1, 0)) + +static const int reg_pending[] __initconst = { 0x00, 0x04, 0x08 }; +static const int reg_enable[] __initconst = { 0x18, 0x10, 0x14 }; +static const int reg_disable[] __initconst = { 0x24, 0x1c, 0x20 }; +static const int bank_irqs[] __initconst = { 8, 32, 32 }; + +static const int shortcuts[] = { + 7, 9, 10, 18, 19, /* Bank 1 */ + 21, 22, 23, 24, 25, 30 /* Bank 2 */ +}; + +struct armctrl_ic { + void __iomem *base; + void __iomem *pending[NR_BANKS]; + void __iomem *enable[NR_BANKS]; + void __iomem *disable[NR_BANKS]; + struct irq_domain *domain; + void __iomem *local_base; +}; + +static struct armctrl_ic intc __read_mostly; +static void __exception_irq_entry bcm2835_handle_irq( + struct pt_regs *regs); +static void bcm2836_chained_handle_irq(struct irq_desc *desc); + +static inline unsigned int hwirq_to_fiq(unsigned long hwirq) +{ + hwirq -= NUMBER_IRQS; + /* + * The hwirq numbering used in this driver is: + * BASE (0-7) GPU1 (32-63) GPU2 (64-95). + * This differ from the one used in the FIQ register: + * GPU1 (0-31) GPU2 (32-63) BASE (64-71) + */ + if (hwirq >= 32) + return hwirq - 32; + + return hwirq + 64; +} + +static void armctrl_mask_irq(struct irq_data *d) +{ + if (d->hwirq >= NUMBER_IRQS) + writel_relaxed(REG_FIQ_DISABLE, intc.base + REG_FIQ_CONTROL); + else + writel_relaxed(HWIRQ_BIT(d->hwirq), + intc.disable[HWIRQ_BANK(d->hwirq)]); +} + +static void armctrl_unmask_irq(struct irq_data *d) +{ + if (d->hwirq >= NUMBER_IRQS) { + if (num_online_cpus() > 1) { + unsigned int data; + + if (!intc.local_base) { + pr_err("FIQ is disabled due to missing arm_local_intc\n"); + return; + } + + data = readl_relaxed(intc.local_base + + ARM_LOCAL_GPU_INT_ROUTING); + + data &= ~0xc; + data |= (1 << 2); + writel_relaxed(data, + intc.local_base + + ARM_LOCAL_GPU_INT_ROUTING); + } + + writel_relaxed(REG_FIQ_ENABLE | hwirq_to_fiq(d->hwirq), + intc.base + REG_FIQ_CONTROL); + } else { + writel_relaxed(HWIRQ_BIT(d->hwirq), + intc.enable[HWIRQ_BANK(d->hwirq)]); + } +} + +#ifdef CONFIG_ARM64 +void bcm2836_arm_irqchip_spin_gpu_irq(void); + +static void armctrl_ack_irq(struct irq_data *d) +{ + bcm2836_arm_irqchip_spin_gpu_irq(); +} + +#endif + +static struct irq_chip armctrl_chip = { + .name = "ARMCTRL-level", + .irq_mask = armctrl_mask_irq, + .irq_unmask = armctrl_unmask_irq, +#ifdef CONFIG_ARM64 + .irq_ack = armctrl_ack_irq +#endif +}; + +static int armctrl_xlate(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type) +{ + if (WARN_ON(intsize != 2)) + return -EINVAL; + + if (WARN_ON(intspec[0] >= NR_BANKS)) + return -EINVAL; + + if (WARN_ON(intspec[1] >= IRQS_PER_BANK)) + return -EINVAL; + + if (WARN_ON(intspec[0] == 0 && intspec[1] >= NR_IRQS_BANK0)) + return -EINVAL; + + *out_hwirq = MAKE_HWIRQ(intspec[0], intspec[1]); + *out_type = IRQ_TYPE_NONE; + return 0; +} + +static const struct irq_domain_ops armctrl_ops = { + .xlate = armctrl_xlate +}; + +static int __init armctrl_of_init(struct device_node *node, + struct device_node *parent, + bool is_2836) +{ + void __iomem *base; + int irq, b, i; + + base = of_iomap(node, 0); + if (!base) + panic("%pOF: unable to map IC registers\n", node); + + intc.base = base; + intc.domain = irq_domain_add_linear(node, NUMBER_IRQS * 2, + &armctrl_ops, NULL); + if (!intc.domain) + panic("%pOF: unable to create IRQ domain\n", node); + + for (b = 0; b < NR_BANKS; b++) { + intc.pending[b] = base + reg_pending[b]; + intc.enable[b] = base + reg_enable[b]; + intc.disable[b] = base + reg_disable[b]; + + for (i = 0; i < bank_irqs[b]; i++) { + irq = irq_create_mapping(intc.domain, MAKE_HWIRQ(b, i)); + BUG_ON(irq <= 0); + irq_set_chip_and_handler(irq, &armctrl_chip, + handle_level_irq); + irq_set_probe(irq); + } + } + + if (is_2836) { + int parent_irq = irq_of_parse_and_map(node, 0); + + if (!parent_irq) { + panic("%pOF: unable to get parent interrupt.\n", + node); + } + irq_set_chained_handler(parent_irq, bcm2836_chained_handle_irq); + } else { + set_handle_irq(bcm2835_handle_irq); + } + + if (is_2836) { + extern void __iomem * __attribute__((weak)) arm_local_intc; + intc.local_base = arm_local_intc; + if (!intc.local_base) + pr_err("Failed to get local intc base. FIQ is disabled for cpus > 1\n"); + } + + /* Make a duplicate irq range which is used to enable FIQ */ + for (b = 0; b < NR_BANKS; b++) { + for (i = 0; i < bank_irqs[b]; i++) { + irq = irq_create_mapping(intc.domain, + MAKE_HWIRQ(b, i) + NUMBER_IRQS); + BUG_ON(irq <= 0); + irq_set_chip(irq, &armctrl_chip); + irq_set_probe(irq); + } + } +#ifndef CONFIG_ARM64 + init_FIQ(FIQ_START); +#endif + + return 0; +} + +static int __init bcm2835_armctrl_of_init(struct device_node *node, + struct device_node *parent) +{ + return armctrl_of_init(node, parent, false); +} + +static int __init bcm2836_armctrl_of_init(struct device_node *node, + struct device_node *parent) +{ + return armctrl_of_init(node, parent, true); +} + + +/* + * Handle each interrupt across the entire interrupt controller. This reads the + * status register before handling each interrupt, which is necessary given that + * handle_IRQ may briefly re-enable interrupts for soft IRQ handling. + */ + +static u32 armctrl_translate_bank(int bank) +{ + u32 stat = readl_relaxed(intc.pending[bank]); + + return MAKE_HWIRQ(bank, ffs(stat) - 1); +} + +static u32 armctrl_translate_shortcut(int bank, u32 stat) +{ + return MAKE_HWIRQ(bank, shortcuts[ffs(stat >> SHORTCUT_SHIFT) - 1]); +} + +static u32 get_next_armctrl_hwirq(void) +{ + u32 stat = readl_relaxed(intc.pending[0]) & BANK0_VALID_MASK; + + if (stat == 0) + return ~0; + else if (stat & BANK0_HWIRQ_MASK) + return MAKE_HWIRQ(0, ffs(stat & BANK0_HWIRQ_MASK) - 1); + else if (stat & SHORTCUT1_MASK) + return armctrl_translate_shortcut(1, stat & SHORTCUT1_MASK); + else if (stat & SHORTCUT2_MASK) + return armctrl_translate_shortcut(2, stat & SHORTCUT2_MASK); + else if (stat & BANK1_HWIRQ) + return armctrl_translate_bank(1); + else if (stat & BANK2_HWIRQ) + return armctrl_translate_bank(2); + else + BUG(); +} + +static void __exception_irq_entry bcm2835_handle_irq( + struct pt_regs *regs) +{ + u32 hwirq; + + while ((hwirq = get_next_armctrl_hwirq()) != ~0) + handle_domain_irq(intc.domain, hwirq, regs); +} + +static void bcm2836_chained_handle_irq(struct irq_desc *desc) +{ + u32 hwirq; + + while ((hwirq = get_next_armctrl_hwirq()) != ~0) + generic_handle_irq(irq_linear_revmap(intc.domain, hwirq)); +} + +IRQCHIP_DECLARE(bcm2835_armctrl_ic, "brcm,bcm2835-armctrl-ic", + bcm2835_armctrl_of_init); +IRQCHIP_DECLARE(bcm2836_armctrl_ic, "brcm,bcm2836-armctrl-ic", + bcm2836_armctrl_of_init); diff -ruN linux-org/drivers/irqchip/irq-bcm2836.c linux/drivers/irqchip/irq-bcm2836.c --- linux-org/drivers/irqchip/irq-bcm2836.c 2022-03-25 09:55:38.961464526 +0100 +++ linux/drivers/irqchip/irq-bcm2836.c 2022-03-25 10:16:01.380859769 +0100 @@ -19,10 +19,63 @@ #include #include #include -#include - #include +#define LOCAL_CONTROL 0x000 +#define LOCAL_PRESCALER 0x008 + +/* + * The low 2 bits identify the CPU that the GPU IRQ goes to, and the + * next 2 bits identify the CPU that the GPU FIQ goes to. + */ +#define LOCAL_GPU_ROUTING 0x00c +/* When setting bits 0-3, enables PMU interrupts on that CPU. */ +#define LOCAL_PM_ROUTING_SET 0x010 +/* When setting bits 0-3, disables PMU interrupts on that CPU. */ +#define LOCAL_PM_ROUTING_CLR 0x014 +/* + * The low 4 bits of this are the CPU's timer IRQ enables, and the + * next 4 bits are the CPU's timer FIQ enables (which override the IRQ + * bits). + */ +#define LOCAL_TIMER_INT_CONTROL0 0x040 +/* + * The low 4 bits of this are the CPU's per-mailbox IRQ enables, and + * the next 4 bits are the CPU's per-mailbox FIQ enables (which + * override the IRQ bits). + */ +#define LOCAL_MAILBOX_INT_CONTROL0 0x050 +/* + * The CPU's interrupt status register. Bits are defined by the the + * LOCAL_IRQ_* bits below. + */ +#define LOCAL_IRQ_PENDING0 0x060 +/* Same status bits as above, but for FIQ. */ +#define LOCAL_FIQ_PENDING0 0x070 +/* + * Mailbox write-to-set bits. There are 16 mailboxes, 4 per CPU, and + * these bits are organized by mailbox number and then CPU number. We + * use mailbox 0 for IPIs. The mailbox's interrupt is raised while + * any bit is set. + */ +#define LOCAL_MAILBOX0_SET0 0x080 +#define LOCAL_MAILBOX3_SET0 0x08c +/* Mailbox write-to-clear bits. */ +#define LOCAL_MAILBOX0_CLR0 0x0c0 +#define LOCAL_MAILBOX3_CLR0 0x0cc + +#define LOCAL_IRQ_CNTPSIRQ 0 +#define LOCAL_IRQ_CNTPNSIRQ 1 +#define LOCAL_IRQ_CNTHPIRQ 2 +#define LOCAL_IRQ_CNTVIRQ 3 +#define LOCAL_IRQ_MAILBOX0 4 +#define LOCAL_IRQ_MAILBOX1 5 +#define LOCAL_IRQ_MAILBOX2 6 +#define LOCAL_IRQ_MAILBOX3 7 +#define LOCAL_IRQ_GPU_FAST 8 +#define LOCAL_IRQ_PMU_FAST 9 +#define LAST_IRQ LOCAL_IRQ_PMU_FAST + struct bcm2836_arm_irqchip_intc { struct irq_domain *domain; void __iomem *base; @@ -30,9 +83,6 @@ static struct bcm2836_arm_irqchip_intc intc __read_mostly; -void __iomem *arm_local_intc; -EXPORT_SYMBOL_GPL(arm_local_intc); - static void bcm2836_arm_irqchip_mask_per_cpu_irq(unsigned int reg_offset, unsigned int bit, int cpu) @@ -95,27 +145,6 @@ { } -#ifdef CONFIG_ARM64 - -void bcm2836_arm_irqchip_spin_gpu_irq(void) -{ - u32 i; - void __iomem *gpurouting = (intc.base + LOCAL_GPU_ROUTING); - u32 routing_val = readl(gpurouting); - - for (i = 1; i <= 3; i++) { - u32 new_routing_val = (routing_val + i) & 3; - - if (cpu_active(new_routing_val)) { - writel(new_routing_val, gpurouting); - return; - } - } -} -EXPORT_SYMBOL(bcm2836_arm_irqchip_spin_gpu_irq); - -#endif - static struct irq_chip bcm2836_arm_irqchip_gpu = { .name = "bcm2836-gpu", .irq_mask = bcm2836_arm_irqchip_mask_gpu_irq, @@ -128,7 +157,7 @@ irq_set_percpu_devid(irq); irq_set_chip_and_handler(irq, chip, handle_percpu_devid_irq); - irq_set_status_flags(irq, IRQ_NOAUTOEN | IRQ_TYPE_LEVEL_LOW); + irq_set_status_flags(irq, IRQ_NOAUTOEN); } static void @@ -146,7 +175,6 @@ u32 ipi = ffs(mbox_val) - 1; writel(1 << ipi, mailbox0); - dsb(sy); handle_IPI(ipi, regs); #endif } else if (stat) { @@ -187,6 +215,24 @@ cpu); return 0; } + +#ifdef CONFIG_ARM +static int __init bcm2836_smp_boot_secondary(unsigned int cpu, + struct task_struct *idle) +{ + unsigned long secondary_startup_phys = + (unsigned long)virt_to_phys((void *)secondary_startup); + + writel(secondary_startup_phys, + intc.base + LOCAL_MAILBOX3_SET0 + 16 * cpu); + + return 0; +} + +static const struct smp_operations bcm2836_smp_ops __initconst = { + .smp_boot_secondary = bcm2836_smp_boot_secondary, +}; +#endif #endif static const struct irq_domain_ops bcm2836_arm_irqchip_intc_ops = { @@ -203,6 +249,10 @@ bcm2836_cpu_dying); set_smp_cross_call(bcm2836_arm_irqchip_send_ipi); + +#ifdef CONFIG_ARM + smp_set_ops(&bcm2836_smp_ops); +#endif #endif } @@ -235,8 +285,6 @@ panic("%pOF: unable to map local interrupt registers\n", node); } - arm_local_intc = intc.base; - bcm2835_init_local_timer_frequency(); intc.domain = irq_domain_add_linear(node, LAST_IRQ + 1, diff -ruN linux-org/drivers/irqchip/irq-bcm2836.c.orig linux/drivers/irqchip/irq-bcm2836.c.orig --- linux-org/drivers/irqchip/irq-bcm2836.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux/drivers/irqchip/irq-bcm2836.c.orig 2022-03-25 10:14:32.457194722 +0100 @@ -0,0 +1,268 @@ +/* + * Root interrupt controller for the BCM2836 (Raspberry Pi 2). + * + * Copyright 2015 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include + +struct bcm2836_arm_irqchip_intc { + struct irq_domain *domain; + void __iomem *base; +}; + +static struct bcm2836_arm_irqchip_intc intc __read_mostly; + +void __iomem *arm_local_intc; +EXPORT_SYMBOL_GPL(arm_local_intc); + +static void bcm2836_arm_irqchip_mask_per_cpu_irq(unsigned int reg_offset, + unsigned int bit, + int cpu) +{ + void __iomem *reg = intc.base + reg_offset + 4 * cpu; + + writel(readl(reg) & ~BIT(bit), reg); +} + +static void bcm2836_arm_irqchip_unmask_per_cpu_irq(unsigned int reg_offset, + unsigned int bit, + int cpu) +{ + void __iomem *reg = intc.base + reg_offset + 4 * cpu; + + writel(readl(reg) | BIT(bit), reg); +} + +static void bcm2836_arm_irqchip_mask_timer_irq(struct irq_data *d) +{ + bcm2836_arm_irqchip_mask_per_cpu_irq(LOCAL_TIMER_INT_CONTROL0, + d->hwirq - LOCAL_IRQ_CNTPSIRQ, + smp_processor_id()); +} + +static void bcm2836_arm_irqchip_unmask_timer_irq(struct irq_data *d) +{ + bcm2836_arm_irqchip_unmask_per_cpu_irq(LOCAL_TIMER_INT_CONTROL0, + d->hwirq - LOCAL_IRQ_CNTPSIRQ, + smp_processor_id()); +} + +static struct irq_chip bcm2836_arm_irqchip_timer = { + .name = "bcm2836-timer", + .irq_mask = bcm2836_arm_irqchip_mask_timer_irq, + .irq_unmask = bcm2836_arm_irqchip_unmask_timer_irq, +}; + +static void bcm2836_arm_irqchip_mask_pmu_irq(struct irq_data *d) +{ + writel(1 << smp_processor_id(), intc.base + LOCAL_PM_ROUTING_CLR); +} + +static void bcm2836_arm_irqchip_unmask_pmu_irq(struct irq_data *d) +{ + writel(1 << smp_processor_id(), intc.base + LOCAL_PM_ROUTING_SET); +} + +static struct irq_chip bcm2836_arm_irqchip_pmu = { + .name = "bcm2836-pmu", + .irq_mask = bcm2836_arm_irqchip_mask_pmu_irq, + .irq_unmask = bcm2836_arm_irqchip_unmask_pmu_irq, +}; + +static void bcm2836_arm_irqchip_mask_gpu_irq(struct irq_data *d) +{ +} + +static void bcm2836_arm_irqchip_unmask_gpu_irq(struct irq_data *d) +{ +} + +#ifdef CONFIG_ARM64 + +void bcm2836_arm_irqchip_spin_gpu_irq(void) +{ + u32 i; + void __iomem *gpurouting = (intc.base + LOCAL_GPU_ROUTING); + u32 routing_val = readl(gpurouting); + + for (i = 1; i <= 3; i++) { + u32 new_routing_val = (routing_val + i) & 3; + + if (cpu_active(new_routing_val)) { + writel(new_routing_val, gpurouting); + return; + } + } +} +EXPORT_SYMBOL(bcm2836_arm_irqchip_spin_gpu_irq); + +#endif + +static struct irq_chip bcm2836_arm_irqchip_gpu = { + .name = "bcm2836-gpu", + .irq_mask = bcm2836_arm_irqchip_mask_gpu_irq, + .irq_unmask = bcm2836_arm_irqchip_unmask_gpu_irq, +}; + +static void bcm2836_arm_irqchip_register_irq(int hwirq, struct irq_chip *chip) +{ + int irq = irq_create_mapping(intc.domain, hwirq); + + irq_set_percpu_devid(irq); + irq_set_chip_and_handler(irq, chip, handle_percpu_devid_irq); + irq_set_status_flags(irq, IRQ_NOAUTOEN | IRQ_TYPE_LEVEL_LOW); +} + +static void +__exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + u32 stat; + + stat = readl_relaxed(intc.base + LOCAL_IRQ_PENDING0 + 4 * cpu); + if (stat & BIT(LOCAL_IRQ_MAILBOX0)) { +#ifdef CONFIG_SMP + void __iomem *mailbox0 = (intc.base + + LOCAL_MAILBOX0_CLR0 + 16 * cpu); + u32 mbox_val = readl(mailbox0); + u32 ipi = ffs(mbox_val) - 1; + + writel(1 << ipi, mailbox0); + dsb(sy); + handle_IPI(ipi, regs); +#endif + } else if (stat) { + u32 hwirq = ffs(stat) - 1; + + handle_domain_irq(intc.domain, hwirq, regs); + } +} + +#ifdef CONFIG_SMP +static void bcm2836_arm_irqchip_send_ipi(const struct cpumask *mask, + unsigned int ipi) +{ + int cpu; + void __iomem *mailbox0_base = intc.base + LOCAL_MAILBOX0_SET0; + + /* + * Ensure that stores to normal memory are visible to the + * other CPUs before issuing the IPI. + */ + smp_wmb(); + + for_each_cpu(cpu, mask) { + writel(1 << ipi, mailbox0_base + 16 * cpu); + } +} + +static int bcm2836_cpu_starting(unsigned int cpu) +{ + bcm2836_arm_irqchip_unmask_per_cpu_irq(LOCAL_MAILBOX_INT_CONTROL0, 0, + cpu); + return 0; +} + +static int bcm2836_cpu_dying(unsigned int cpu) +{ + bcm2836_arm_irqchip_mask_per_cpu_irq(LOCAL_MAILBOX_INT_CONTROL0, 0, + cpu); + return 0; +} +#endif + +static const struct irq_domain_ops bcm2836_arm_irqchip_intc_ops = { + .xlate = irq_domain_xlate_onecell +}; + +static void +bcm2836_arm_irqchip_smp_init(void) +{ +#ifdef CONFIG_SMP + /* Unmask IPIs to the boot CPU. */ + cpuhp_setup_state(CPUHP_AP_IRQ_BCM2836_STARTING, + "irqchip/bcm2836:starting", bcm2836_cpu_starting, + bcm2836_cpu_dying); + + set_smp_cross_call(bcm2836_arm_irqchip_send_ipi); +#endif +} + +/* + * The LOCAL_IRQ_CNT* timer firings are based off of the external + * oscillator with some scaling. The firmware sets up CNTFRQ to + * report 19.2Mhz, but doesn't set up the scaling registers. + */ +static void bcm2835_init_local_timer_frequency(void) +{ + /* + * Set the timer to source from the 19.2Mhz crystal clock (bit + * 8 unset), and only increment by 1 instead of 2 (bit 9 + * unset). + */ + writel(0, intc.base + LOCAL_CONTROL); + + /* + * Set the timer prescaler to 1:1 (timer freq = input freq * + * 2**31 / prescaler) + */ + writel(0x80000000, intc.base + LOCAL_PRESCALER); +} + +static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node, + struct device_node *parent) +{ + intc.base = of_iomap(node, 0); + if (!intc.base) { + panic("%pOF: unable to map local interrupt registers\n", node); + } + + arm_local_intc = intc.base; + + bcm2835_init_local_timer_frequency(); + + intc.domain = irq_domain_add_linear(node, LAST_IRQ + 1, + &bcm2836_arm_irqchip_intc_ops, + NULL); + if (!intc.domain) + panic("%pOF: unable to create IRQ domain\n", node); + + bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_CNTPSIRQ, + &bcm2836_arm_irqchip_timer); + bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_CNTPNSIRQ, + &bcm2836_arm_irqchip_timer); + bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_CNTHPIRQ, + &bcm2836_arm_irqchip_timer); + bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_CNTVIRQ, + &bcm2836_arm_irqchip_timer); + bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_GPU_FAST, + &bcm2836_arm_irqchip_gpu); + bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_PMU_FAST, + &bcm2836_arm_irqchip_pmu); + + bcm2836_arm_irqchip_smp_init(); + + set_handle_irq(bcm2836_arm_irqchip_handle_irq); + return 0; +} + +IRQCHIP_DECLARE(bcm2836_arm_irqchip_l1_intc, "brcm,bcm2836-l1-intc", + bcm2836_arm_irqchip_l1_intc_of_init); diff -ruN linux-org/drivers/irqchip/irq-bcm7120-l2.c linux/drivers/irqchip/irq-bcm7120-l2.c --- linux-org/drivers/irqchip/irq-bcm7120-l2.c 2022-03-25 09:55:38.961464526 +0100 +++ linux/drivers/irqchip/irq-bcm7120-l2.c 2022-03-25 10:15:23.349003026 +0100 @@ -61,6 +61,7 @@ struct bcm7120_l2_intc_data *b = data->b; struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int idx; + unsigned long flags; chained_irq_enter(chip, desc); @@ -71,11 +72,11 @@ unsigned long pending; int hwirq; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); pending = irq_reg_readl(gc, b->stat_offset[idx]) & gc->mask_cache & data->irq_map_mask[idx]; - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) { generic_handle_irq(irq_find_mapping(b->domain, @@ -90,22 +91,24 @@ { struct bcm7120_l2_intc_data *b = gc->private; struct irq_chip_type *ct = gc->chip_types; + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); if (b->can_wake) irq_reg_writel(gc, gc->mask_cache | gc->wake_active, ct->regs.mask); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } static void bcm7120_l2_intc_resume(struct irq_chip_generic *gc) { struct irq_chip_type *ct = gc->chip_types; + unsigned long flags; /* Restore the saved mask */ - irq_gc_lock(gc); + flags = irq_gc_lock(gc); irq_reg_writel(gc, gc->mask_cache, ct->regs.mask); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } static int bcm7120_l2_intc_init_one(struct device_node *dn, diff -ruN linux-org/drivers/irqchip/irq-brcmstb-l2.c linux/drivers/irqchip/irq-brcmstb-l2.c --- linux-org/drivers/irqchip/irq-brcmstb-l2.c 2022-03-25 09:55:38.961464526 +0100 +++ linux/drivers/irqchip/irq-brcmstb-l2.c 2022-03-25 10:15:23.349003026 +0100 @@ -83,8 +83,9 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct brcmstb_l2_intc_data *b = gc->private; + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); /* Save the current mask */ b->saved_mask = irq_reg_readl(gc, CPU_MASK_STATUS); @@ -93,22 +94,23 @@ irq_reg_writel(gc, ~gc->wake_active, CPU_MASK_SET); irq_reg_writel(gc, gc->wake_active, CPU_MASK_CLEAR); } - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } static void brcmstb_l2_intc_resume(struct irq_data *d) { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct brcmstb_l2_intc_data *b = gc->private; + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); /* Clear unmasked non-wakeup interrupts */ irq_reg_writel(gc, ~b->saved_mask & ~gc->wake_active, CPU_CLEAR); /* Restore the saved mask */ irq_reg_writel(gc, b->saved_mask, CPU_MASK_SET); irq_reg_writel(gc, ~b->saved_mask, CPU_MASK_CLEAR); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } static int __init brcmstb_l2_intc_of_init(struct device_node *np, diff -ruN linux-org/drivers/irqchip/irq-crossbar.c linux/drivers/irqchip/irq-crossbar.c --- linux-org/drivers/irqchip/irq-crossbar.c 2022-03-25 09:55:38.961464526 +0100 +++ linux/drivers/irqchip/irq-crossbar.c 2022-03-25 10:15:23.349003026 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #define IRQ_FREE -1 #define IRQ_RESERVED -2 @@ -69,10 +70,15 @@ .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_type = irq_chip_set_type_parent, .flags = IRQCHIP_MASK_ON_SUSPEND | - IRQCHIP_SKIP_SET_WAKE, + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_PIPELINE_SAFE, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif +#ifdef CONFIG_IPIPE + .irq_hold = irq_chip_hold_parent, + .irq_release = irq_chip_release_parent, +#endif }; static int allocate_gic_irq(struct irq_domain *domain, unsigned virq, diff -ruN linux-org/drivers/irqchip/irq-dw-apb-ictl.c linux/drivers/irqchip/irq-dw-apb-ictl.c --- linux-org/drivers/irqchip/irq-dw-apb-ictl.c 2022-03-25 09:55:38.961464526 +0100 +++ linux/drivers/irqchip/irq-dw-apb-ictl.c 2022-03-25 10:15:23.349003026 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #define APB_INT_ENABLE_L 0x00 #define APB_INT_ENABLE_H 0x04 @@ -42,7 +43,7 @@ u32 hwirq = ffs(stat) - 1; u32 virq = irq_find_mapping(d, gc->irq_base + hwirq); - generic_handle_irq(virq); + ipipe_handle_demuxed_irq(virq); stat &= ~(1 << hwirq); } } @@ -55,11 +56,12 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct irq_chip_type *ct = irq_data_get_chip_type(d); + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); writel_relaxed(~0, gc->reg_base + ct->regs.enable); writel_relaxed(*ct->mask_cache, gc->reg_base + ct->regs.mask); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } #else #define dw_apb_ictl_resume NULL @@ -144,6 +146,7 @@ gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit; gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit; gc->chip_types[0].chip.irq_resume = dw_apb_ictl_resume; + gc->chip_types[0].chip.flags |= IRQCHIP_PIPELINE_SAFE; } irq_set_chained_handler_and_data(irq, dw_apb_ictl_handler, domain); diff -ruN linux-org/drivers/irqchip/irq-gic.c linux/drivers/irqchip/irq-gic.c --- linux-org/drivers/irqchip/irq-gic.c 2022-03-25 09:55:38.965464511 +0100 +++ linux/drivers/irqchip/irq-gic.c 2022-03-25 10:15:23.349003026 +0100 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -91,9 +92,17 @@ #endif }; +#ifdef CONFIG_IPIPE +#define pipeline_lock(__flags) do { (__flags) = hard_local_irq_save(); } while (0) +#define pipeline_unlock(__flags) hard_local_irq_restore(__flags) +#else +#define pipeline_lock(__flags) do { (void)__flags; } while (0) +#define pipeline_unlock(__flags) do { (void)__flags; } while (0) +#endif + #ifdef CONFIG_BL_SWITCHER -static DEFINE_RAW_SPINLOCK(cpu_map_lock); +static IPIPE_DEFINE_RAW_SPINLOCK(cpu_map_lock); #define gic_lock_irqsave(f) \ raw_spin_lock_irqsave(&cpu_map_lock, (f)) @@ -204,7 +213,12 @@ static void gic_mask_irq(struct irq_data *d) { + unsigned long flags; + + pipeline_lock(flags); + ipipe_lock_irq(d->irq); gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR); + pipeline_unlock(flags); } static void gic_eoimode1_mask_irq(struct irq_data *d) @@ -224,7 +238,12 @@ static void gic_unmask_irq(struct irq_data *d) { + unsigned long flags; + + pipeline_lock(flags); gic_poke_irq(d, GIC_DIST_ENABLE_SET); + ipipe_unlock_irq(d->irq); + pipeline_unlock(flags); } static void gic_eoi_irq(struct irq_data *d) @@ -241,6 +260,27 @@ writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE); } +#ifdef CONFIG_IPIPE +static void gic_hold_irq(struct irq_data *d) +{ + struct irq_chip *chip = irq_data_get_irq_chip(d); + + gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR); + + if (chip->irq_eoi == gic_eoimode1_eoi_irq) { + if (irqd_is_forwarded_to_vcpu(d)) + gic_poke_irq(d, GIC_DIST_ACTIVE_CLEAR); + gic_eoimode1_eoi_irq(d); + } else + gic_eoi_irq(d); +} + +static void gic_release_irq(struct irq_data *d) +{ + gic_poke_irq(d, GIC_DIST_ENABLE_SET); +} +#endif /* CONFIG_IPIPE */ + static int gic_irq_set_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool val) { @@ -364,7 +404,7 @@ if (static_key_true(&supports_deactivate)) writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI); isb(); - handle_domain_irq(gic->domain, irqnr, regs); + ipipe_handle_domain_irq(gic->domain, irqnr, regs); continue; } if (irqnr < 16) { @@ -380,7 +420,7 @@ * Pairs with the write barrier in gic_raise_softirq */ smp_rmb(); - handle_IPI(irqnr, regs); + ipipe_handle_multi_ipi(irqnr, regs); #endif continue; } @@ -408,7 +448,7 @@ handle_bad_irq(desc); } else { isb(); - generic_handle_irq(cascade_irq); + ipipe_handle_demuxed_irq(cascade_irq); } out: @@ -420,11 +460,16 @@ .irq_unmask = gic_unmask_irq, .irq_eoi = gic_eoi_irq, .irq_set_type = gic_set_type, +#ifdef CONFIG_IPIPE + .irq_hold = gic_hold_irq, + .irq_release = gic_release_irq, +#endif .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_MASK_ON_SUSPEND, + IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_PIPELINE_SAFE, }; void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) @@ -471,7 +516,6 @@ writel_relaxed(bypass | mode | GICC_ENABLE, cpu_base + GIC_CPU_CTRL); } - static void gic_dist_init(struct gic_chip_data *gic) { unsigned int i; diff -ruN linux-org/drivers/irqchip/irq-gic-v3.c linux/drivers/irqchip/irq-gic-v3.c --- linux-org/drivers/irqchip/irq-gic-v3.c 2022-03-25 09:55:38.961464526 +0100 +++ linux/drivers/irqchip/irq-gic-v3.c 2022-03-25 10:15:23.349003026 +0100 @@ -200,7 +200,12 @@ static void gic_mask_irq(struct irq_data *d) { + unsigned long flags; + + flags = hard_cond_local_irq_save(); + ipipe_lock_irq(d->irq); gic_poke_irq(d, GICD_ICENABLER); + hard_cond_local_irq_restore(flags); } static void gic_eoimode1_mask_irq(struct irq_data *d) @@ -220,7 +225,12 @@ static void gic_unmask_irq(struct irq_data *d) { + unsigned long flags; + + flags = hard_cond_local_irq_save(); gic_poke_irq(d, GICD_ISENABLER); + ipipe_unlock_irq(d->irq); + hard_cond_local_irq_restore(flags); } static int gic_irq_set_irqchip_state(struct irq_data *d, @@ -294,6 +304,27 @@ gic_write_dir(gic_irq(d)); } +#ifdef CONFIG_IPIPE +static void gic_hold_irq(struct irq_data *d) +{ + struct irq_chip *chip = irq_data_get_irq_chip(d); + + gic_poke_irq(d, GICD_ICENABLER); + + if (chip->irq_eoi == gic_eoimode1_eoi_irq) { + if (irqd_is_forwarded_to_vcpu(d)) + gic_poke_irq(d, GICD_ICACTIVER); + gic_eoimode1_eoi_irq(d); + } else + gic_eoi_irq(d); +} + +static void gic_release_irq(struct irq_data *d) +{ + gic_poke_irq(d, GICD_ISENABLER); +} +#endif /* CONFIG_IPIPE */ + static int gic_set_type(struct irq_data *d, unsigned int type) { unsigned int irq = gic_irq(d); @@ -356,7 +387,7 @@ else isb(); - err = handle_domain_irq(gic_data.domain, irqnr, regs); + err = ipipe_handle_domain_irq(gic_data.domain, irqnr, regs); if (err) { WARN_ONCE(true, "Unexpected interrupt received!\n"); if (static_key_true(&supports_deactivate)) { @@ -380,7 +411,7 @@ * that any shared data read by handle_IPI will * be read after the ACK. */ - handle_IPI(irqnr, regs); + ipipe_handle_multi_ipi(irqnr, regs); #else WARN_ONCE(true, "Unexpected SGI received!\n"); #endif @@ -769,10 +800,14 @@ .irq_unmask = gic_unmask_irq, .irq_eoi = gic_eoi_irq, .irq_set_type = gic_set_type, +#ifdef CONFIG_IPIPE + .irq_hold = gic_hold_irq, + .irq_release = gic_release_irq, +#endif .irq_set_affinity = gic_set_affinity, .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, - .flags = IRQCHIP_SET_TYPE_MASKED, + .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_PIPELINE_SAFE, }; static struct irq_chip gic_eoimode1_chip = { @@ -781,11 +816,15 @@ .irq_unmask = gic_unmask_irq, .irq_eoi = gic_eoimode1_eoi_irq, .irq_set_type = gic_set_type, +#ifdef CONFIG_IPIPE + .irq_hold = gic_hold_irq, + .irq_release = gic_release_irq, +#endif .irq_set_affinity = gic_set_affinity, .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity, - .flags = IRQCHIP_SET_TYPE_MASKED, + .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_PIPELINE_SAFE, }; #define GIC_ID_NR (1U << gic_data.rdists.id_bits) diff -ruN linux-org/drivers/irqchip/irq-imx-gpcv2.c linux/drivers/irqchip/irq-imx-gpcv2.c --- linux-org/drivers/irqchip/irq-imx-gpcv2.c 2022-03-25 09:55:38.965464511 +0100 +++ linux/drivers/irqchip/irq-imx-gpcv2.c 2022-03-25 10:15:23.349003026 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include #define IMR_NUM 4 @@ -19,7 +20,11 @@ #define GPC_IMR1_CORE1 0x40 struct gpcv2_irqchip_data { +#ifdef CONFIG_IPIPE + ipipe_spinlock_t rlock; +#else struct raw_spinlock rlock; +#endif void __iomem *gpc_base; u32 wakeup_sources[IMR_NUM]; u32 saved_irq_mask[IMR_NUM]; @@ -45,6 +50,7 @@ static int gpcv2_wakeup_source_save(void) { struct gpcv2_irqchip_data *cd; + unsigned long flags; void __iomem *reg; int i; @@ -54,8 +60,10 @@ for (i = 0; i < IMR_NUM; i++) { reg = cd->gpc_base + cd->cpu2wakeup + i * 4; + flags = hard_cond_local_irq_save(); cd->saved_irq_mask[i] = readl_relaxed(reg); writel_relaxed(cd->wakeup_sources[i], reg); + hard_cond_local_irq_restore(flags); } return 0; @@ -64,6 +72,7 @@ static void gpcv2_wakeup_source_restore(void) { struct gpcv2_irqchip_data *cd; + unsigned long flags; void __iomem *reg; int i; @@ -72,8 +81,10 @@ return; for (i = 0; i < IMR_NUM; i++) { + flags = hard_cond_local_irq_save(); reg = cd->gpc_base + cd->cpu2wakeup + i * 4; writel_relaxed(cd->saved_irq_mask[i], reg); + hard_cond_local_irq_restore(flags); } } @@ -106,38 +117,79 @@ return 0; } -static void imx_gpcv2_irq_unmask(struct irq_data *d) +static void __imx_gpcv2_irq_unmask(struct irq_data *d) { struct gpcv2_irqchip_data *cd = d->chip_data; void __iomem *reg; u32 val; - raw_spin_lock(&cd->rlock); reg = cd->gpc_base + cd->cpu2wakeup + d->hwirq / 32 * 4; val = readl_relaxed(reg); val &= ~(1 << d->hwirq % 32); writel_relaxed(val, reg); - raw_spin_unlock(&cd->rlock); +} +static void imx_gpcv2_irq_unmask(struct irq_data *d) +{ + struct gpcv2_irqchip_data *cd = d->chip_data; + unsigned long flags; + + raw_spin_lock_irqsave(&cd->rlock, flags); + __imx_gpcv2_irq_unmask(d); + __ipipe_spin_unlock_irqbegin(&cd->rlock); irq_chip_unmask_parent(d); + __ipipe_spin_unlock_irqcomplete(flags); } -static void imx_gpcv2_irq_mask(struct irq_data *d) +static void __imx_gpcv2_irq_mask(struct irq_data *d) { struct gpcv2_irqchip_data *cd = d->chip_data; void __iomem *reg; u32 val; - raw_spin_lock(&cd->rlock); reg = cd->gpc_base + cd->cpu2wakeup + d->hwirq / 32 * 4; val = readl_relaxed(reg); val |= 1 << (d->hwirq % 32); writel_relaxed(val, reg); - raw_spin_unlock(&cd->rlock); +} +static void imx_gpcv2_irq_mask(struct irq_data *d) +{ + struct gpcv2_irqchip_data *cd = d->chip_data; + unsigned long flags; + + raw_spin_lock_irqsave(&cd->rlock, flags); + __imx_gpcv2_irq_mask(d); + __ipipe_spin_unlock_irqbegin(&cd->rlock); irq_chip_mask_parent(d); + __ipipe_spin_unlock_irqcomplete(flags); +} + +#ifdef CONFIG_IPIPE + +static void imx_gpc_hold_irq(struct irq_data *d) +{ + struct gpcv2_irqchip_data *cd = d->chip_data; + + raw_spin_lock(&cd->rlock); + __imx_gpcv2_irq_mask(d); + raw_spin_unlock(&cd->rlock); + irq_chip_hold_parent(d); } +static void imx_gpc_release_irq(struct irq_data *d) +{ + struct gpcv2_irqchip_data *cd = d->chip_data; + unsigned long flags; + + raw_spin_lock_irqsave(&cd->rlock, flags); + __imx_gpcv2_irq_unmask(d); + raw_spin_unlock_irqrestore(&cd->rlock, flags); + irq_chip_release_parent(d); +} + +#endif /* CONFIG_IPIPE */ + static struct irq_chip gpcv2_irqchip_data_chip = { .name = "GPCv2", .irq_eoi = irq_chip_eoi_parent, @@ -148,6 +200,11 @@ #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif +#ifdef CONFIG_IPIPE + .irq_hold = imx_gpc_hold_irq, + .irq_release = imx_gpc_release_irq, +#endif + .flags = IRQCHIP_PIPELINE_SAFE, }; static int imx_gpcv2_domain_translate(struct irq_domain *d, diff -ruN linux-org/drivers/irqchip/irq-omap-intc.c linux/drivers/irqchip/irq-omap-intc.c --- linux-org/drivers/irqchip/irq-omap-intc.c 2022-03-25 09:55:38.965464511 +0100 +++ linux/drivers/irqchip/irq-omap-intc.c 2022-03-25 10:15:23.349003026 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ #define INTC_MIR_CLEAR0 0x0088 #define INTC_MIR_SET0 0x008c #define INTC_PENDING_IRQ0 0x0098 +#define INTC_PRIO 0x0100 #define INTC_PENDING_IRQ1 0x00b8 #define INTC_PENDING_IRQ2 0x00d8 #define INTC_PENDING_IRQ3 0x00f8 @@ -53,6 +55,12 @@ #define INTCPS_NR_ILR_REGS 128 #define INTCPS_NR_MIR_REGS 4 +#if !defined(MULTI_OMAP1) && !defined(MULTI_OMAP2) +#define inline_single inline +#else +#define inline_single +#endif + #define INTC_IDLE_FUNCIDLE (1 << 0) #define INTC_IDLE_TURBO (1 << 1) @@ -73,12 +81,12 @@ static int omap_nr_pending = 3; static int omap_nr_irqs = 96; -static void intc_writel(u32 reg, u32 val) +static inline_single void intc_writel(u32 reg, u32 val) { writel_relaxed(val, omap_irq_base + reg); } -static u32 intc_readl(u32 reg) +static inline_single u32 intc_readl(u32 reg) { return readl_relaxed(omap_irq_base + reg); } @@ -141,9 +149,10 @@ } /* XXX: FIQ and additional INTC support (only MPU at the moment) */ -static void omap_ack_irq(struct irq_data *d) +static inline_single void omap_ack_irq(struct irq_data *d) { intc_writel(INTC_CONTROL, 0x1); + dsb(); } static void omap_mask_ack_irq(struct irq_data *d) @@ -168,8 +177,14 @@ while (!(intc_readl(INTC_SYSSTATUS) & 0x1)) /* Wait for reset to complete */; +#ifndef CONFIG_IPIPE /* Enable autoidle */ intc_writel(INTC_SYSCONFIG, 1 << 0); +#else /* CONFIG_IPIPE */ + /* Disable autoidle */ + intc_writel(INTC_SYSCONFIG, 0); + intc_writel(INTC_IDLE, 0x1); +#endif /* CONFIG_IPIPE */ } int omap_irq_pending(void) @@ -215,7 +230,7 @@ ct->chip.irq_mask = irq_gc_mask_disable_reg; ct->chip.irq_unmask = irq_gc_unmask_enable_reg; - ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE; + ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE; ct->regs.enable = INTC_MIR_CLEAR0 + 32 * i; ct->regs.disable = INTC_MIR_SET0 + 32 * i; @@ -235,8 +250,11 @@ ct = gc->chip_types; ct->chip.irq_ack = omap_mask_ack_irq; ct->chip.irq_mask = irq_gc_mask_disable_reg; +#ifdef CONFIG_IPIPE + ct->chip.irq_mask_ack = omap_mask_ack_irq; +#endif ct->chip.irq_unmask = irq_gc_unmask_enable_reg; - ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE; + ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE; ct->regs.enable = INTC_MIR_CLEAR0; ct->regs.disable = INTC_MIR_SET0; @@ -361,7 +379,7 @@ } irqnr &= ACTIVEIRQ_MASK; - handle_domain_irq(domain, irqnr, regs); + ipipe_handle_domain_irq(domain, irqnr, regs); } void __init omap3_init_irq(void) @@ -399,6 +417,28 @@ return 0; } +#if defined(CONFIG_IPIPE) && defined(CONFIG_ARCH_OMAP2PLUS) +#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) +void omap3_intc_mute(void) +{ + intc_writel(INTC_THRESHOLD, 0x1); + intc_writel(INTC_CONTROL, 0x1); +} + +void omap3_intc_unmute(void) +{ + intc_writel(INTC_THRESHOLD, 0xff); +} + +void omap3_intc_set_irq_prio(int irq, int hi) +{ + if (irq >= INTCPS_NR_MIR_REGS * 32) + return; + intc_writel(INTC_PRIO + 4 * irq, hi ? 0 : 0xfc); +} +#endif /* CONFIG_ARCH_OMAP3 */ +#endif /* CONFIG_IPIPE && ARCH_OMAP2PLUS */ + IRQCHIP_DECLARE(omap2_intc, "ti,omap2-intc", intc_of_init); IRQCHIP_DECLARE(omap3_intc, "ti,omap3-intc", intc_of_init); IRQCHIP_DECLARE(dm814x_intc, "ti,dm814-intc", intc_of_init); diff -ruN linux-org/drivers/irqchip/irq-sunxi-nmi.c linux/drivers/irqchip/irq-sunxi-nmi.c --- linux-org/drivers/irqchip/irq-sunxi-nmi.c 2022-03-25 09:55:38.969464496 +0100 +++ linux/drivers/irqchip/irq-sunxi-nmi.c 2022-03-25 10:15:23.349003026 +0100 @@ -115,8 +115,9 @@ u32 ctrl_off = ct->regs.type; unsigned int src_type; unsigned int i; + unsigned long flags; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); switch (flow_type & IRQF_TRIGGER_MASK) { case IRQ_TYPE_EDGE_FALLING: @@ -133,7 +134,7 @@ src_type = SUNXI_SRC_TYPE_LEVEL_LOW; break; default: - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); pr_err("Cannot assign multiple trigger modes to IRQ %d.\n", data->irq); return -EBADR; @@ -151,7 +152,7 @@ src_type_reg |= src_type; sunxi_sc_nmi_write(gc, ctrl_off, src_type_reg); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); return IRQ_SET_MASK_OK; } @@ -200,7 +201,7 @@ gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; gc->chip_types[0].chip.irq_eoi = irq_gc_ack_set_bit; gc->chip_types[0].chip.irq_set_type = sunxi_sc_nmi_set_type; - gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED; + gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED | IRQCHIP_PIPELINE_SAFE; gc->chip_types[0].regs.ack = reg_offs->pend; gc->chip_types[0].regs.mask = reg_offs->enable; gc->chip_types[0].regs.type = reg_offs->ctrl; @@ -211,6 +212,7 @@ gc->chip_types[1].chip.irq_mask = irq_gc_mask_clr_bit; gc->chip_types[1].chip.irq_unmask = irq_gc_mask_set_bit; gc->chip_types[1].chip.irq_set_type = sunxi_sc_nmi_set_type; + gc->chip_types[1].chip.flags = IRQCHIP_PIPELINE_SAFE; gc->chip_types[1].regs.ack = reg_offs->pend; gc->chip_types[1].regs.mask = reg_offs->enable; gc->chip_types[1].regs.type = reg_offs->ctrl; diff -ruN linux-org/drivers/irqchip/irq-versatile-fpga.c linux/drivers/irqchip/irq-versatile-fpga.c --- linux-org/drivers/irqchip/irq-versatile-fpga.c 2022-03-25 09:55:38.969464496 +0100 +++ linux/drivers/irqchip/irq-versatile-fpga.c 2022-03-25 10:15:23.349003026 +0100 @@ -80,7 +80,7 @@ unsigned int irq = ffs(status) - 1; status &= ~(1 << irq); - generic_handle_irq(irq_find_mapping(f->domain, irq)); + ipipe_handle_demuxed_irq(irq_find_mapping(f->domain, irq)); } while (status); } @@ -97,7 +97,7 @@ while ((status = readl(f->base + IRQ_STATUS))) { irq = ffs(status) - 1; - handle_domain_irq(f->domain, irq, regs); + ipipe_handle_domain_irq(f->domain, irq, regs); handled = 1; } @@ -153,7 +153,11 @@ f->chip.name = name; f->chip.irq_ack = fpga_irq_mask; f->chip.irq_mask = fpga_irq_mask; +#ifdef CONFIG_IPIPE + f->chip.irq_mask_ack = fpga_irq_mask; +#endif f->chip.irq_unmask = fpga_irq_unmask; + f->chip.flags = IRQCHIP_PIPELINE_SAFE; f->valid = valid; if (parent_irq != -1) { diff -ruN linux-org/drivers/irqchip/irq-vic.c linux/drivers/irqchip/irq-vic.c --- linux-org/drivers/irqchip/irq-vic.c 2022-03-25 09:55:38.969464496 +0100 +++ linux/drivers/irqchip/irq-vic.c 2022-03-25 10:15:23.349003026 +0100 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -218,7 +219,7 @@ while ((stat = readl_relaxed(vic->base + VIC_IRQ_STATUS))) { irq = ffs(stat) - 1; - handle_domain_irq(vic->domain, irq, regs); + ipipe_handle_domain_irq(vic->domain, irq, regs); handled = 1; } @@ -235,7 +236,7 @@ while ((stat = readl_relaxed(vic->base + VIC_IRQ_STATUS))) { hwirq = ffs(stat) - 1; - generic_handle_irq(irq_find_mapping(vic->domain, hwirq)); + ipipe_handle_demuxed_irq(irq_find_mapping(vic->domain, hwirq)); } chained_irq_exit(host_chip, desc); @@ -339,7 +340,7 @@ #if defined(CONFIG_PM) static struct vic_device *vic_from_irq(unsigned int irq) { - struct vic_device *v = vic_devices; + struct vic_device *v = vic_devices; unsigned int base_irq = irq & ~31; int id; @@ -378,8 +379,12 @@ .name = "VIC", .irq_ack = vic_ack_irq, .irq_mask = vic_mask_irq, +#ifdef CONFIG_IPIPE + .irq_mask_ack = vic_ack_irq, +#endif /* CONFIG_IPIPE */ .irq_unmask = vic_unmask_irq, .irq_set_wake = vic_set_wake, + .flags = IRQCHIP_PIPELINE_SAFE, }; static void __init vic_disable(void __iomem *base) diff -ruN linux-org/drivers/memory/omap-gpmc.c linux/drivers/memory/omap-gpmc.c --- linux-org/drivers/memory/omap-gpmc.c 2022-03-25 09:55:39.373462974 +0100 +++ linux/drivers/memory/omap-gpmc.c 2022-03-25 10:15:23.349003026 +0100 @@ -1161,12 +1161,15 @@ static int gpmc_irq_endis(unsigned long hwirq, bool endis) { + unsigned long flags; u32 regval; /* bits GPMC_NR_NAND_IRQS to 8 are reserved */ if (hwirq >= GPMC_NR_NAND_IRQS) hwirq += 8 - GPMC_NR_NAND_IRQS; + flags = hard_local_irq_save(); + regval = gpmc_read_reg(GPMC_IRQENABLE); if (endis) regval |= BIT(hwirq); @@ -1174,6 +1177,8 @@ regval &= ~BIT(hwirq); gpmc_write_reg(GPMC_IRQENABLE, regval); + hard_local_irq_restore(flags); + return 0; } @@ -1199,6 +1204,7 @@ static void gpmc_irq_edge_config(unsigned long hwirq, bool rising_edge) { + unsigned long flags; u32 regval; /* NAND IRQs polarity is not configurable */ @@ -1208,6 +1214,8 @@ /* WAITPIN starts at BIT 8 */ hwirq += 8 - GPMC_NR_NAND_IRQS; + flags = hard_local_irq_save(); + regval = gpmc_read_reg(GPMC_CONFIG); if (rising_edge) regval &= ~BIT(hwirq); @@ -1215,6 +1223,8 @@ regval |= BIT(hwirq); gpmc_write_reg(GPMC_CONFIG, regval); + + hard_local_irq_restore(flags); } static void gpmc_irq_ack(struct irq_data *d) @@ -1294,7 +1304,7 @@ hwirq, virq); } - generic_handle_irq(virq); + ipipe_handle_demuxed_irq(virq); } } @@ -1322,6 +1332,7 @@ gpmc->irq_chip.irq_mask = gpmc_irq_mask; gpmc->irq_chip.irq_unmask = gpmc_irq_unmask; gpmc->irq_chip.irq_set_type = gpmc_irq_set_type; + gpmc->irq_chip.flags |= IRQCHIP_PIPELINE_SAFE; gpmc_irq_domain = irq_domain_add_linear(gpmc->dev->of_node, gpmc->nirqs, diff -ruN linux-org/drivers/pci/host/pcie-altera.c linux/drivers/pci/host/pcie-altera.c --- linux-org/drivers/pci/host/pcie-altera.c 2022-03-25 09:55:40.445458936 +0100 +++ linux/drivers/pci/host/pcie-altera.c 2022-03-25 10:15:23.349003026 +0100 @@ -486,7 +486,7 @@ virq = irq_find_mapping(pcie->irq_domain, bit); if (virq) - generic_handle_irq(virq); + ipipe_handle_demuxed_irq(virq); else dev_err(dev, "unexpected IRQ, INT%d\n", bit); } diff -ruN linux-org/drivers/pinctrl/bcm/pinctrl-bcm2835.c linux/drivers/pinctrl/bcm/pinctrl-bcm2835.c --- linux-org/drivers/pinctrl/bcm/pinctrl-bcm2835.c 2022-03-25 09:55:40.493458756 +0100 +++ linux/drivers/pinctrl/bcm/pinctrl-bcm2835.c 2022-03-25 10:15:23.349003026 +0100 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -92,7 +93,11 @@ struct gpio_chip gpio_chip; struct pinctrl_gpio_range gpio_range; - spinlock_t irq_lock[BCM2835_NUM_BANKS]; +#ifdef CONFIG_IPIPE + ipipe_spinlock_t irq_lock[BCM2835_NUM_BANKS]; +#else + raw_spinlock_t irq_lock[BCM2835_NUM_BANKS]; +#endif }; /* pins are just named GPIO0..GPIO53 */ @@ -379,7 +384,7 @@ events &= pc->enabled_irq_map[bank]; for_each_set_bit(offset, &events, 32) { gpio = (32 * bank) + offset; - generic_handle_irq(irq_linear_revmap(pc->gpio_chip.irqdomain, + ipipe_handle_demuxed_irq(irq_linear_revmap(pc->gpio_chip.irqdomain, gpio)); } } @@ -471,10 +476,11 @@ unsigned bank = GPIO_REG_OFFSET(gpio); unsigned long flags; - spin_lock_irqsave(&pc->irq_lock[bank], flags); + raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); set_bit(offset, &pc->enabled_irq_map[bank]); bcm2835_gpio_irq_config(pc, gpio, true); - spin_unlock_irqrestore(&pc->irq_lock[bank], flags); + ipipe_unlock_irq(data->irq); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); } static void bcm2835_gpio_irq_disable(struct irq_data *data) @@ -486,12 +492,13 @@ unsigned bank = GPIO_REG_OFFSET(gpio); unsigned long flags; - spin_lock_irqsave(&pc->irq_lock[bank], flags); + raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); bcm2835_gpio_irq_config(pc, gpio, false); /* Clear events that were latched prior to clearing event sources */ bcm2835_gpio_set_bit(pc, GPEDS0, gpio); clear_bit(offset, &pc->enabled_irq_map[bank]); - spin_unlock_irqrestore(&pc->irq_lock[bank], flags); + ipipe_lock_irq(data->irq); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); } static int __bcm2835_gpio_irq_set_type_disabled(struct bcm2835_pinctrl *pc, @@ -594,7 +601,7 @@ unsigned long flags; int ret; - spin_lock_irqsave(&pc->irq_lock[bank], flags); + raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); if (test_bit(offset, &pc->enabled_irq_map[bank])) ret = __bcm2835_gpio_irq_set_type_enabled(pc, gpio, type); @@ -606,7 +613,7 @@ else irq_set_handler_locked(data, handle_level_irq); - spin_unlock_irqrestore(&pc->irq_lock[bank], flags); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); return ret; } @@ -620,6 +627,39 @@ bcm2835_gpio_set_bit(pc, GPEDS0, gpio); } +#ifdef CONFIG_IPIPE + +static void bcm2835_gpio_irq_hold(struct irq_data *data) +{ + struct bcm2835_pinctrl *pc = irq_data_get_irq_chip_data(data); + unsigned gpio = irqd_to_hwirq(data); + unsigned offset = GPIO_REG_SHIFT(gpio); + unsigned bank = GPIO_REG_OFFSET(gpio); + unsigned long flags; + + raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); + bcm2835_gpio_irq_config(pc, gpio, false); + bcm2835_gpio_set_bit(pc, GPEDS0, gpio); + clear_bit(offset, &pc->enabled_irq_map[bank]); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); +} + +static void bcm2835_gpio_irq_release(struct irq_data *data) +{ + struct bcm2835_pinctrl *pc = irq_data_get_irq_chip_data(data); + unsigned gpio = irqd_to_hwirq(data); + unsigned offset = GPIO_REG_SHIFT(gpio); + unsigned bank = GPIO_REG_OFFSET(gpio); + unsigned long flags; + + raw_spin_lock_irqsave(&pc->irq_lock[bank], flags); + set_bit(offset, &pc->enabled_irq_map[bank]); + bcm2835_gpio_irq_config(pc, gpio, true); + raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags); +} + +#endif + static struct irq_chip bcm2835_gpio_irq_chip = { .name = MODULE_NAME, .irq_enable = bcm2835_gpio_irq_enable, @@ -628,6 +668,11 @@ .irq_ack = bcm2835_gpio_irq_ack, .irq_mask = bcm2835_gpio_irq_disable, .irq_unmask = bcm2835_gpio_irq_enable, +#ifdef CONFIG_IPIPE + .irq_hold = bcm2835_gpio_irq_hold, + .irq_release = bcm2835_gpio_irq_release, +#endif + .flags = IRQCHIP_PIPELINE_SAFE, }; static int bcm2835_pctl_get_groups_count(struct pinctrl_dev *pctldev) @@ -1021,7 +1066,7 @@ for_each_set_bit(offset, &events, 32) bcm2835_gpio_wr(pc, GPEDS0 + i * 4, BIT(offset)); - spin_lock_init(&pc->irq_lock[i]); + raw_spin_lock_init(&pc->irq_lock[i]); } err = gpiochip_add_data(&pc->gpio_chip, pc); diff -ruN linux-org/drivers/pinctrl/pinctrl-rockchip.c linux/drivers/pinctrl/pinctrl-rockchip.c --- linux-org/drivers/pinctrl/pinctrl-rockchip.c 2022-03-25 09:55:40.521458650 +0100 +++ linux/drivers/pinctrl/pinctrl-rockchip.c 2022-03-25 10:15:23.349003026 +0100 @@ -2637,7 +2637,7 @@ u32 polarity; u32 level; u32 data; - unsigned long flags; + unsigned long flags, flags2; int ret; /* make sure the pin is configured as gpio input */ @@ -2660,7 +2660,7 @@ irq_set_handler_locked(d, handle_level_irq); raw_spin_lock_irqsave(&bank->slock, flags); - irq_gc_lock(gc); + flags2 = irq_gc_lock(gc); level = readl_relaxed(gc->reg_base + GPIO_INTTYPE_LEVEL); polarity = readl_relaxed(gc->reg_base + GPIO_INT_POLARITY); @@ -2701,7 +2701,7 @@ polarity &= ~mask; break; default: - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags2); raw_spin_unlock_irqrestore(&bank->slock, flags); clk_disable(bank->clk); return -EINVAL; @@ -2710,7 +2710,7 @@ writel_relaxed(level, gc->reg_base + GPIO_INTTYPE_LEVEL); writel_relaxed(polarity, gc->reg_base + GPIO_INT_POLARITY); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags2); raw_spin_unlock_irqrestore(&bank->slock, flags); clk_disable(bank->clk); diff -ruN linux-org/drivers/pinctrl/pinctrl-single.c linux/drivers/pinctrl/pinctrl-single.c --- linux-org/drivers/pinctrl/pinctrl-single.c 2022-03-25 09:55:40.525458634 +0100 +++ linux/drivers/pinctrl/pinctrl-single.c 2022-03-25 10:15:23.349003026 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -182,7 +183,11 @@ #define PCS_FEAT_PINCONF (1 << 0) struct property *missing_nr_pinctrl_cells; struct pcs_soc_data socdata; +#ifdef CONFIG_IPIPE + ipipe_spinlock_t lock; +#else /* !IPIPE */ raw_spinlock_t lock; +#endif /* !IPIPE */ struct mutex mutex; unsigned width; unsigned fmask; @@ -1417,7 +1422,7 @@ mask = pcs->read(pcswi->reg); raw_spin_unlock(&pcs->lock); if (mask & pcs_soc->irq_status_mask) { - generic_handle_irq(irq_find_mapping(pcs->domain, + ipipe_handle_demuxed_irq(irq_find_mapping(pcs->domain, pcswi->hwirq)); count++; } @@ -1437,8 +1442,14 @@ static irqreturn_t pcs_irq_handler(int irq, void *d) { struct pcs_soc_data *pcs_soc = d; + unsigned long flags; + irqreturn_t ret; - return pcs_irq_handle(pcs_soc) ? IRQ_HANDLED : IRQ_NONE; + flags = hard_cond_local_irq_save(); + ret = pcs_irq_handle(pcs_soc) ? IRQ_HANDLED : IRQ_NONE; + hard_cond_local_irq_restore(flags); + + return ret; } /** diff -ruN linux-org/drivers/pinctrl/sunxi/pinctrl-sunxi.c linux/drivers/pinctrl/sunxi/pinctrl-sunxi.c --- linux-org/drivers/pinctrl/sunxi/pinctrl-sunxi.c 2022-03-25 09:55:40.561458499 +0100 +++ linux/drivers/pinctrl/sunxi/pinctrl-sunxi.c 2022-03-25 10:15:23.349003026 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -934,14 +935,33 @@ .irq_request_resources = sunxi_pinctrl_irq_request_resources, .irq_release_resources = sunxi_pinctrl_irq_release_resources, .irq_set_type = sunxi_pinctrl_irq_set_type, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, }; +#ifdef CONFIG_IPIPE + +static void sunxi_pinctrl_irq_hold(struct irq_data *d) +{ + sunxi_pinctrl_irq_mask(d); + sunxi_pinctrl_irq_ack(d); +} + +static void sunxi_pinctrl_irq_release(struct irq_data *d) +{ + sunxi_pinctrl_irq_unmask(d); +} + +#endif + static struct irq_chip sunxi_pinctrl_level_irq_chip = { .name = "sunxi_pio_level", .irq_eoi = sunxi_pinctrl_irq_ack, .irq_mask = sunxi_pinctrl_irq_mask, .irq_unmask = sunxi_pinctrl_irq_unmask, +#ifdef CONFIG_IPIPE + .irq_hold = sunxi_pinctrl_irq_hold, + .irq_release = sunxi_pinctrl_irq_release, +#endif /* Define irq_enable / disable to avoid spurious irqs for drivers * using these to suppress irqs while they clear the irq source */ .irq_enable = sunxi_pinctrl_irq_ack_unmask, @@ -950,7 +970,7 @@ .irq_release_resources = sunxi_pinctrl_irq_release_resources, .irq_set_type = sunxi_pinctrl_irq_set_type, .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_EOI_THREADED | - IRQCHIP_EOI_IF_HANDLED, + IRQCHIP_EOI_IF_HANDLED | IRQCHIP_PIPELINE_SAFE, }; static int sunxi_pinctrl_irq_of_xlate(struct irq_domain *d, @@ -1008,7 +1028,7 @@ for_each_set_bit(irqoffset, &val, IRQ_PER_BANK) { int pin_irq = irq_find_mapping(pctl->domain, bank * IRQ_PER_BANK + irqoffset); - generic_handle_irq(pin_irq); + ipipe_handle_demuxed_irq(pin_irq); } chained_irq_exit(chip, desc); } diff -ruN linux-org/drivers/pinctrl/sunxi/pinctrl-sunxi.h linux/drivers/pinctrl/sunxi/pinctrl-sunxi.h --- linux-org/drivers/pinctrl/sunxi/pinctrl-sunxi.h 2022-03-25 09:55:40.561458499 +0100 +++ linux/drivers/pinctrl/sunxi/pinctrl-sunxi.h 2022-03-25 10:15:23.349003026 +0100 @@ -137,7 +137,11 @@ unsigned ngroups; int *irq; unsigned *irq_array; +#ifdef CONFIG_IPIPE + ipipe_spinlock_t lock; +#else raw_spinlock_t lock; +#endif struct pinctrl_dev *pctl_dev; unsigned long variant; }; diff -ruN linux-org/drivers/soc/dove/pmu.c linux/drivers/soc/dove/pmu.c --- linux-org/drivers/soc/dove/pmu.c 2022-03-25 09:55:40.937457082 +0100 +++ linux/drivers/soc/dove/pmu.c 2022-03-25 10:15:23.353003010 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #define NR_PMU_IRQS 7 @@ -231,6 +232,7 @@ void __iomem *base = gc->reg_base; u32 stat = readl_relaxed(base + PMC_IRQ_CAUSE) & gc->mask_cache; u32 done = ~0; + unsigned long flags; if (stat == 0) { handle_bad_irq(desc); @@ -243,7 +245,7 @@ stat &= ~(1 << hwirq); done &= ~(1 << hwirq); - generic_handle_irq(irq_find_mapping(domain, hwirq)); + ipipe_handle_demuxed_irq(irq_find_mapping(domain, hwirq)); } /* @@ -257,10 +259,10 @@ * So, let's structure the code so that the window is as small as * possible. */ - irq_gc_lock(gc); + flags = irq_gc_lock(gc); done &= readl_relaxed(base + PMC_IRQ_CAUSE); writel_relaxed(done, base + PMC_IRQ_CAUSE); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } static int __init dove_init_pmu_irq(struct pmu_data *pmu, int irq) @@ -296,6 +298,7 @@ gc->chip_types[0].regs.mask = PMC_IRQ_MASK; gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit; gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; + gc->chip_types[0].chip.flags |= IRQCHIP_PIPELINE_SAFE; pmu->irq_domain = domain; pmu->irq_gc = gc; diff -ruN linux-org/drivers/tty/serial/8250/8250_core.c linux/drivers/tty/serial/8250/8250_core.c --- linux-org/drivers/tty/serial/8250/8250_core.c 2022-03-25 09:55:41.501454958 +0100 +++ linux/drivers/tty/serial/8250/8250_core.c 2022-03-25 10:15:23.353003010 +0100 @@ -597,6 +597,48 @@ serial8250_console_write(up, s, count); } +#ifdef CONFIG_RAW_PRINTK + +static void raw_write_char(struct uart_8250_port *up, int c) +{ + unsigned int status, tmout = 10000; + + for (;;) { + status = serial_in(up, UART_LSR); + up->lsr_saved_flags |= status & LSR_SAVE_FLAGS; + if ((status & UART_LSR_THRE) == UART_LSR_THRE) + break; + if (--tmout == 0) + break; + cpu_relax(); + } + serial_port_out(&up->port, UART_TX, c); +} + +static void univ8250_console_write_raw(struct console *co, const char *s, + unsigned int count) +{ + struct uart_8250_port *up = &serial8250_ports[co->index]; + unsigned int ier; + + ier = serial_in(up, UART_IER); + + if (up->capabilities & UART_CAP_UUE) + serial_out(up, UART_IER, UART_IER_UUE); + else + serial_out(up, UART_IER, 0); + + while (count-- > 0) { + if (*s == '\n') + raw_write_char(up, '\r'); + raw_write_char(up, *s++); + } + + serial_out(up, UART_IER, ier); +} + +#endif + static int univ8250_console_setup(struct console *co, char *options) { struct uart_port *port; @@ -678,7 +720,12 @@ .device = uart_console_device, .setup = univ8250_console_setup, .match = univ8250_console_match, +#ifdef CONFIG_RAW_PRINTK + .write_raw = univ8250_console_write_raw, + .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_RAW, +#else .flags = CON_PRINTBUFFER | CON_ANYTIME, +#endif .index = -1, .data = &serial8250_reg, }; diff -ruN linux-org/drivers/tty/serial/amba-pl011.c linux/drivers/tty/serial/amba-pl011.c --- linux-org/drivers/tty/serial/amba-pl011.c 2022-03-25 09:55:41.509454928 +0100 +++ linux/drivers/tty/serial/amba-pl011.c 2022-03-25 10:15:23.353003010 +0100 @@ -2230,6 +2230,42 @@ pl011_write(ch, uap, REG_DR); } +#ifdef CONFIG_RAW_PRINTK + +#define pl011_clk_setup(clk) clk_prepare_enable(clk) +#define pl011_clk_enable(clk) do { } while (0) +#define pl011_clk_disable(clk) do { } while (0) + +static void +pl011_console_write_raw(struct console *co, const char *s, unsigned int count) +{ + struct uart_amba_port *uap = amba_ports[co->index]; + unsigned int old_cr, new_cr, status; + + old_cr = readw(uap->port.membase + UART011_CR); + new_cr = old_cr & ~UART011_CR_CTSEN; + new_cr |= UART01x_CR_UARTEN | UART011_CR_TXE; + writew(new_cr, uap->port.membase + UART011_CR); + + while (count-- > 0) { + if (*s == '\n') + pl011_console_putchar(&uap->port, '\r'); + pl011_console_putchar(&uap->port, *s++); + } + do + status = readw(uap->port.membase + UART01x_FR); + while (status & UART01x_FR_BUSY); + writew(old_cr, uap->port.membase + UART011_CR); +} + +#else /* !CONFIG_RAW_PRINTK */ + +#define pl011_clk_setup(clk) clk_prepare(clk) +#define pl011_clk_enable(clk) clk_enable(clk) +#define pl011_clk_disable(clk) clk_disable(clk) + +#endif /* !CONFIG_RAW_PRINTK */ + static void pl011_console_write(struct console *co, const char *s, unsigned int count) { @@ -2238,7 +2274,7 @@ unsigned long flags; int locked = 1; - clk_enable(uap->clk); + pl011_clk_enable(uap->clk); local_irq_save(flags); if (uap->port.sysrq) @@ -2275,7 +2311,7 @@ spin_unlock(&uap->port.lock); local_irq_restore(flags); - clk_disable(uap->clk); + pl011_clk_disable(uap->clk); } static void __init @@ -2336,7 +2372,7 @@ /* Allow pins to be muxed in and configured */ pinctrl_pm_select_default_state(uap->port.dev); - ret = clk_prepare(uap->clk); + ret = pl011_clk_setup(uap->clk); if (ret) return ret; @@ -2430,7 +2466,12 @@ .device = uart_console_device, .setup = pl011_console_setup, .match = pl011_console_match, +#ifdef CONFIG_RAW_PRINTK + .write_raw = pl011_console_write_raw, + .flags = CON_PRINTBUFFER | CON_RAW | CON_ANYTIME, +#else .flags = CON_PRINTBUFFER | CON_ANYTIME, +#endif .index = -1, .data = &amba_reg, }; diff -ruN linux-org/drivers/tty/serial/amba-pl011.c.orig linux/drivers/tty/serial/amba-pl011.c.orig --- linux-org/drivers/tty/serial/amba-pl011.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux/drivers/tty/serial/amba-pl011.c.orig 2022-03-25 10:14:34.569186766 +0100 @@ -0,0 +1,2877 @@ +/* + * Driver for AMBA serial ports + * + * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. + * + * Copyright 1999 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * Copyright (C) 2010 ST-Ericsson SA + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This is a generic driver for ARM AMBA-type serial ports. They + * have a lot of 16550-like features, but are not register compatible. + * Note that although they do have CTS, DCD and DSR inputs, they do + * not have an RI input, nor do they have DTR or RTS outputs. If + * required, these have to be supplied via some other means (eg, GPIO) + * and hooked into this driver. + */ + + +#if defined(CONFIG_SERIAL_AMBA_PL011_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "amba-pl011.h" + +#define UART_NR 14 + +#define SERIAL_AMBA_MAJOR 204 +#define SERIAL_AMBA_MINOR 64 +#define SERIAL_AMBA_NR UART_NR + +#define AMBA_ISR_PASS_LIMIT 256 + +#define UART_DR_ERROR (UART011_DR_OE|UART011_DR_BE|UART011_DR_PE|UART011_DR_FE) +#define UART_DUMMY_DR_RX (1 << 16) + +static u16 pl011_std_offsets[REG_ARRAY_SIZE] = { + [REG_DR] = UART01x_DR, + [REG_FR] = UART01x_FR, + [REG_LCRH_RX] = UART011_LCRH, + [REG_LCRH_TX] = UART011_LCRH, + [REG_IBRD] = UART011_IBRD, + [REG_FBRD] = UART011_FBRD, + [REG_CR] = UART011_CR, + [REG_IFLS] = UART011_IFLS, + [REG_IMSC] = UART011_IMSC, + [REG_RIS] = UART011_RIS, + [REG_MIS] = UART011_MIS, + [REG_ICR] = UART011_ICR, + [REG_DMACR] = UART011_DMACR, +}; + +/* There is by now at least one vendor with differing details, so handle it */ +struct vendor_data { + const u16 *reg_offset; + unsigned int ifls; + unsigned int fr_busy; + unsigned int fr_dsr; + unsigned int fr_cts; + unsigned int fr_ri; + unsigned int inv_fr; + bool access_32b; + bool oversampling; + bool dma_threshold; + bool cts_event_workaround; + bool always_enabled; + bool fixed_options; + + unsigned int (*get_fifosize)(struct amba_device *dev); +}; + +static unsigned int get_fifosize_arm(struct amba_device *dev) +{ + return amba_rev(dev) < 3 ? 16 : 32; +} + +static struct vendor_data vendor_arm = { + .reg_offset = pl011_std_offsets, + .ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8, + .fr_busy = UART01x_FR_BUSY, + .fr_dsr = UART01x_FR_DSR, + .fr_cts = UART01x_FR_CTS, + .fr_ri = UART011_FR_RI, + .oversampling = false, + .dma_threshold = false, + .cts_event_workaround = false, + .always_enabled = false, + .fixed_options = false, + .get_fifosize = get_fifosize_arm, +}; + +static const struct vendor_data vendor_sbsa = { + .reg_offset = pl011_std_offsets, + .fr_busy = UART01x_FR_BUSY, + .fr_dsr = UART01x_FR_DSR, + .fr_cts = UART01x_FR_CTS, + .fr_ri = UART011_FR_RI, + .access_32b = true, + .oversampling = false, + .dma_threshold = false, + .cts_event_workaround = false, + .always_enabled = true, + .fixed_options = true, +}; + +#ifdef CONFIG_ACPI_SPCR_TABLE +static const struct vendor_data vendor_qdt_qdf2400_e44 = { + .reg_offset = pl011_std_offsets, + .fr_busy = UART011_FR_TXFE, + .fr_dsr = UART01x_FR_DSR, + .fr_cts = UART01x_FR_CTS, + .fr_ri = UART011_FR_RI, + .inv_fr = UART011_FR_TXFE, + .access_32b = true, + .oversampling = false, + .dma_threshold = false, + .cts_event_workaround = false, + .always_enabled = true, + .fixed_options = true, +}; +#endif + +static u16 pl011_st_offsets[REG_ARRAY_SIZE] = { + [REG_DR] = UART01x_DR, + [REG_ST_DMAWM] = ST_UART011_DMAWM, + [REG_ST_TIMEOUT] = ST_UART011_TIMEOUT, + [REG_FR] = UART01x_FR, + [REG_LCRH_RX] = ST_UART011_LCRH_RX, + [REG_LCRH_TX] = ST_UART011_LCRH_TX, + [REG_IBRD] = UART011_IBRD, + [REG_FBRD] = UART011_FBRD, + [REG_CR] = UART011_CR, + [REG_IFLS] = UART011_IFLS, + [REG_IMSC] = UART011_IMSC, + [REG_RIS] = UART011_RIS, + [REG_MIS] = UART011_MIS, + [REG_ICR] = UART011_ICR, + [REG_DMACR] = UART011_DMACR, + [REG_ST_XFCR] = ST_UART011_XFCR, + [REG_ST_XON1] = ST_UART011_XON1, + [REG_ST_XON2] = ST_UART011_XON2, + [REG_ST_XOFF1] = ST_UART011_XOFF1, + [REG_ST_XOFF2] = ST_UART011_XOFF2, + [REG_ST_ITCR] = ST_UART011_ITCR, + [REG_ST_ITIP] = ST_UART011_ITIP, + [REG_ST_ABCR] = ST_UART011_ABCR, + [REG_ST_ABIMSC] = ST_UART011_ABIMSC, +}; + +static unsigned int get_fifosize_st(struct amba_device *dev) +{ + return 64; +} + +static struct vendor_data vendor_st = { + .reg_offset = pl011_st_offsets, + .ifls = UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF, + .fr_busy = UART01x_FR_BUSY, + .fr_dsr = UART01x_FR_DSR, + .fr_cts = UART01x_FR_CTS, + .fr_ri = UART011_FR_RI, + .oversampling = true, + .dma_threshold = true, + .cts_event_workaround = true, + .always_enabled = false, + .fixed_options = false, + .get_fifosize = get_fifosize_st, +}; + +static const u16 pl011_zte_offsets[REG_ARRAY_SIZE] = { + [REG_DR] = ZX_UART011_DR, + [REG_FR] = ZX_UART011_FR, + [REG_LCRH_RX] = ZX_UART011_LCRH, + [REG_LCRH_TX] = ZX_UART011_LCRH, + [REG_IBRD] = ZX_UART011_IBRD, + [REG_FBRD] = ZX_UART011_FBRD, + [REG_CR] = ZX_UART011_CR, + [REG_IFLS] = ZX_UART011_IFLS, + [REG_IMSC] = ZX_UART011_IMSC, + [REG_RIS] = ZX_UART011_RIS, + [REG_MIS] = ZX_UART011_MIS, + [REG_ICR] = ZX_UART011_ICR, + [REG_DMACR] = ZX_UART011_DMACR, +}; + +static unsigned int get_fifosize_zte(struct amba_device *dev) +{ + return 16; +} + +static struct vendor_data vendor_zte = { + .reg_offset = pl011_zte_offsets, + .access_32b = true, + .ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8, + .fr_busy = ZX_UART01x_FR_BUSY, + .fr_dsr = ZX_UART01x_FR_DSR, + .fr_cts = ZX_UART01x_FR_CTS, + .fr_ri = ZX_UART011_FR_RI, + .get_fifosize = get_fifosize_zte, +}; + +/* Deals with DMA transactions */ + +struct pl011_sgbuf { + struct scatterlist sg; + char *buf; +}; + +struct pl011_dmarx_data { + struct dma_chan *chan; + struct completion complete; + bool use_buf_b; + struct pl011_sgbuf sgbuf_a; + struct pl011_sgbuf sgbuf_b; + dma_cookie_t cookie; + bool running; + struct timer_list timer; + unsigned int last_residue; + unsigned long last_jiffies; + bool auto_poll_rate; + unsigned int poll_rate; + unsigned int poll_timeout; +}; + +struct pl011_dmatx_data { + struct dma_chan *chan; + struct scatterlist sg; + char *buf; + bool queued; +}; + +/* + * We wrap our port structure around the generic uart_port. + */ +struct uart_amba_port { + struct uart_port port; + const u16 *reg_offset; + struct clk *clk; + const struct vendor_data *vendor; + unsigned int dmacr; /* dma control reg */ + unsigned int im; /* interrupt mask */ + unsigned int old_status; + unsigned int fifosize; /* vendor-specific */ + unsigned int old_cr; /* state during shutdown */ + bool autorts; + unsigned int fixed_baud; /* vendor-set fixed baud rate */ + char type[12]; +#ifdef CONFIG_DMA_ENGINE + /* DMA stuff */ + bool using_tx_dma; + bool using_rx_dma; + struct pl011_dmarx_data dmarx; + struct pl011_dmatx_data dmatx; + bool dma_probed; +#endif +}; + +static unsigned int pl011_reg_to_offset(const struct uart_amba_port *uap, + unsigned int reg) +{ + return uap->reg_offset[reg]; +} + +static unsigned int pl011_read(const struct uart_amba_port *uap, + unsigned int reg) +{ + void __iomem *addr = uap->port.membase + pl011_reg_to_offset(uap, reg); + + return (uap->port.iotype == UPIO_MEM32) ? + readl_relaxed(addr) : readw_relaxed(addr); +} + +static void pl011_write(unsigned int val, const struct uart_amba_port *uap, + unsigned int reg) +{ + void __iomem *addr = uap->port.membase + pl011_reg_to_offset(uap, reg); + + if (uap->port.iotype == UPIO_MEM32) + writel_relaxed(val, addr); + else + writew_relaxed(val, addr); +} + +/* + * Reads up to 256 characters from the FIFO or until it's empty and + * inserts them into the TTY layer. Returns the number of characters + * read from the FIFO. + */ +static int pl011_fifo_to_tty(struct uart_amba_port *uap) +{ + u16 status; + unsigned int ch, flag, max_count = 256; + int fifotaken = 0; + + while (max_count--) { + status = pl011_read(uap, REG_FR); + if (status & UART01x_FR_RXFE) + break; + + /* Take chars from the FIFO and update status */ + ch = pl011_read(uap, REG_DR) | UART_DUMMY_DR_RX; + flag = TTY_NORMAL; + uap->port.icount.rx++; + fifotaken++; + + if (unlikely(ch & UART_DR_ERROR)) { + if (ch & UART011_DR_BE) { + ch &= ~(UART011_DR_FE | UART011_DR_PE); + uap->port.icount.brk++; + if (uart_handle_break(&uap->port)) + continue; + } else if (ch & UART011_DR_PE) + uap->port.icount.parity++; + else if (ch & UART011_DR_FE) + uap->port.icount.frame++; + if (ch & UART011_DR_OE) + uap->port.icount.overrun++; + + ch &= uap->port.read_status_mask; + + if (ch & UART011_DR_BE) + flag = TTY_BREAK; + else if (ch & UART011_DR_PE) + flag = TTY_PARITY; + else if (ch & UART011_DR_FE) + flag = TTY_FRAME; + } + + if (uart_handle_sysrq_char(&uap->port, ch & 255)) + continue; + + uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); + } + + return fifotaken; +} + + +/* + * All the DMA operation mode stuff goes inside this ifdef. + * This assumes that you have a generic DMA device interface, + * no custom DMA interfaces are supported. + */ +#ifdef CONFIG_DMA_ENGINE + +#define PL011_DMA_BUFFER_SIZE PAGE_SIZE + +static int pl011_sgbuf_init(struct dma_chan *chan, struct pl011_sgbuf *sg, + enum dma_data_direction dir) +{ + dma_addr_t dma_addr; + + sg->buf = dma_alloc_coherent(chan->device->dev, + PL011_DMA_BUFFER_SIZE, &dma_addr, GFP_KERNEL); + if (!sg->buf) + return -ENOMEM; + + sg_init_table(&sg->sg, 1); + sg_set_page(&sg->sg, phys_to_page(dma_addr), + PL011_DMA_BUFFER_SIZE, offset_in_page(dma_addr)); + sg_dma_address(&sg->sg) = dma_addr; + sg_dma_len(&sg->sg) = PL011_DMA_BUFFER_SIZE; + + return 0; +} + +static void pl011_sgbuf_free(struct dma_chan *chan, struct pl011_sgbuf *sg, + enum dma_data_direction dir) +{ + if (sg->buf) { + dma_free_coherent(chan->device->dev, + PL011_DMA_BUFFER_SIZE, sg->buf, + sg_dma_address(&sg->sg)); + } +} + +static void pl011_dma_probe(struct uart_amba_port *uap) +{ + /* DMA is the sole user of the platform data right now */ + struct amba_pl011_data *plat = dev_get_platdata(uap->port.dev); + struct device *dev = uap->port.dev; + struct dma_slave_config tx_conf = { + .dst_addr = uap->port.mapbase + + pl011_reg_to_offset(uap, REG_DR), + .dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE, + .direction = DMA_MEM_TO_DEV, + .dst_maxburst = uap->fifosize >> 1, + .device_fc = false, + }; + struct dma_chan *chan; + dma_cap_mask_t mask; + + uap->dma_probed = true; + chan = dma_request_slave_channel_reason(dev, "tx"); + if (IS_ERR(chan)) { + if (PTR_ERR(chan) == -EPROBE_DEFER) { + uap->dma_probed = false; + return; + } + + /* We need platform data */ + if (!plat || !plat->dma_filter) { + dev_info(uap->port.dev, "no DMA platform data\n"); + return; + } + + /* Try to acquire a generic DMA engine slave TX channel */ + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + chan = dma_request_channel(mask, plat->dma_filter, + plat->dma_tx_param); + if (!chan) { + dev_err(uap->port.dev, "no TX DMA channel!\n"); + return; + } + } + + dmaengine_slave_config(chan, &tx_conf); + uap->dmatx.chan = chan; + + dev_info(uap->port.dev, "DMA channel TX %s\n", + dma_chan_name(uap->dmatx.chan)); + + /* Optionally make use of an RX channel as well */ + chan = dma_request_slave_channel(dev, "rx"); + + if (!chan && plat && plat->dma_rx_param) { + chan = dma_request_channel(mask, plat->dma_filter, plat->dma_rx_param); + + if (!chan) { + dev_err(uap->port.dev, "no RX DMA channel!\n"); + return; + } + } + + if (chan) { + struct dma_slave_config rx_conf = { + .src_addr = uap->port.mapbase + + pl011_reg_to_offset(uap, REG_DR), + .src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE, + .direction = DMA_DEV_TO_MEM, + .src_maxburst = uap->fifosize >> 2, + .device_fc = false, + }; + struct dma_slave_caps caps; + + /* + * Some DMA controllers provide information on their capabilities. + * If the controller does, check for suitable residue processing + * otherwise assime all is well. + */ + if (0 == dma_get_slave_caps(chan, &caps)) { + if (caps.residue_granularity == + DMA_RESIDUE_GRANULARITY_DESCRIPTOR) { + dma_release_channel(chan); + dev_info(uap->port.dev, + "RX DMA disabled - no residue processing\n"); + return; + } + } + dmaengine_slave_config(chan, &rx_conf); + uap->dmarx.chan = chan; + + uap->dmarx.auto_poll_rate = false; + if (plat && plat->dma_rx_poll_enable) { + /* Set poll rate if specified. */ + if (plat->dma_rx_poll_rate) { + uap->dmarx.auto_poll_rate = false; + uap->dmarx.poll_rate = plat->dma_rx_poll_rate; + } else { + /* + * 100 ms defaults to poll rate if not + * specified. This will be adjusted with + * the baud rate at set_termios. + */ + uap->dmarx.auto_poll_rate = true; + uap->dmarx.poll_rate = 100; + } + /* 3 secs defaults poll_timeout if not specified. */ + if (plat->dma_rx_poll_timeout) + uap->dmarx.poll_timeout = + plat->dma_rx_poll_timeout; + else + uap->dmarx.poll_timeout = 3000; + } else if (!plat && dev->of_node) { + uap->dmarx.auto_poll_rate = of_property_read_bool( + dev->of_node, "auto-poll"); + if (uap->dmarx.auto_poll_rate) { + u32 x; + + if (0 == of_property_read_u32(dev->of_node, + "poll-rate-ms", &x)) + uap->dmarx.poll_rate = x; + else + uap->dmarx.poll_rate = 100; + if (0 == of_property_read_u32(dev->of_node, + "poll-timeout-ms", &x)) + uap->dmarx.poll_timeout = x; + else + uap->dmarx.poll_timeout = 3000; + } + } + dev_info(uap->port.dev, "DMA channel RX %s\n", + dma_chan_name(uap->dmarx.chan)); + } +} + +static void pl011_dma_remove(struct uart_amba_port *uap) +{ + if (uap->dmatx.chan) + dma_release_channel(uap->dmatx.chan); + if (uap->dmarx.chan) + dma_release_channel(uap->dmarx.chan); +} + +/* Forward declare these for the refill routine */ +static int pl011_dma_tx_refill(struct uart_amba_port *uap); +static void pl011_start_tx_pio(struct uart_amba_port *uap); + +/* + * The current DMA TX buffer has been sent. + * Try to queue up another DMA buffer. + */ +static void pl011_dma_tx_callback(void *data) +{ + struct uart_amba_port *uap = data; + struct pl011_dmatx_data *dmatx = &uap->dmatx; + unsigned long flags; + u16 dmacr; + + spin_lock_irqsave(&uap->port.lock, flags); + if (uap->dmatx.queued) + dma_unmap_sg(dmatx->chan->device->dev, &dmatx->sg, 1, + DMA_TO_DEVICE); + + dmacr = uap->dmacr; + uap->dmacr = dmacr & ~UART011_TXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); + + /* + * If TX DMA was disabled, it means that we've stopped the DMA for + * some reason (eg, XOFF received, or we want to send an X-char.) + * + * Note: we need to be careful here of a potential race between DMA + * and the rest of the driver - if the driver disables TX DMA while + * a TX buffer completing, we must update the tx queued status to + * get further refills (hence we check dmacr). + */ + if (!(dmacr & UART011_TXDMAE) || uart_tx_stopped(&uap->port) || + uart_circ_empty(&uap->port.state->xmit)) { + uap->dmatx.queued = false; + spin_unlock_irqrestore(&uap->port.lock, flags); + return; + } + + if (pl011_dma_tx_refill(uap) <= 0) + /* + * We didn't queue a DMA buffer for some reason, but we + * have data pending to be sent. Re-enable the TX IRQ. + */ + pl011_start_tx_pio(uap); + + spin_unlock_irqrestore(&uap->port.lock, flags); +} + +/* + * Try to refill the TX DMA buffer. + * Locking: called with port lock held and IRQs disabled. + * Returns: + * 1 if we queued up a TX DMA buffer. + * 0 if we didn't want to handle this by DMA + * <0 on error + */ +static int pl011_dma_tx_refill(struct uart_amba_port *uap) +{ + struct pl011_dmatx_data *dmatx = &uap->dmatx; + struct dma_chan *chan = dmatx->chan; + struct dma_device *dma_dev = chan->device; + struct dma_async_tx_descriptor *desc; + struct circ_buf *xmit = &uap->port.state->xmit; + unsigned int count; + + /* + * Try to avoid the overhead involved in using DMA if the + * transaction fits in the first half of the FIFO, by using + * the standard interrupt handling. This ensures that we + * issue a uart_write_wakeup() at the appropriate time. + */ + count = uart_circ_chars_pending(xmit); + if (count < (uap->fifosize >> 1)) { + uap->dmatx.queued = false; + return 0; + } + + /* + * Bodge: don't send the last character by DMA, as this + * will prevent XON from notifying us to restart DMA. + */ + count -= 1; + + /* Else proceed to copy the TX chars to the DMA buffer and fire DMA */ + if (count > PL011_DMA_BUFFER_SIZE) + count = PL011_DMA_BUFFER_SIZE; + + if (xmit->tail < xmit->head) + memcpy(&dmatx->buf[0], &xmit->buf[xmit->tail], count); + else { + size_t first = UART_XMIT_SIZE - xmit->tail; + size_t second; + + if (first > count) + first = count; + second = count - first; + + memcpy(&dmatx->buf[0], &xmit->buf[xmit->tail], first); + if (second) + memcpy(&dmatx->buf[first], &xmit->buf[0], second); + } + + dmatx->sg.length = count; + + if (dma_map_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE) != 1) { + uap->dmatx.queued = false; + dev_dbg(uap->port.dev, "unable to map TX DMA\n"); + return -EBUSY; + } + + desc = dmaengine_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!desc) { + dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE); + uap->dmatx.queued = false; + /* + * If DMA cannot be used right now, we complete this + * transaction via IRQ and let the TTY layer retry. + */ + dev_dbg(uap->port.dev, "TX DMA busy\n"); + return -EBUSY; + } + + /* Some data to go along to the callback */ + desc->callback = pl011_dma_tx_callback; + desc->callback_param = uap; + + /* All errors should happen at prepare time */ + dmaengine_submit(desc); + + /* Fire the DMA transaction */ + dma_dev->device_issue_pending(chan); + + uap->dmacr |= UART011_TXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); + uap->dmatx.queued = true; + + /* + * Now we know that DMA will fire, so advance the ring buffer + * with the stuff we just dispatched. + */ + xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); + uap->port.icount.tx += count; + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&uap->port); + + return 1; +} + +/* + * We received a transmit interrupt without a pending X-char but with + * pending characters. + * Locking: called with port lock held and IRQs disabled. + * Returns: + * false if we want to use PIO to transmit + * true if we queued a DMA buffer + */ +static bool pl011_dma_tx_irq(struct uart_amba_port *uap) +{ + if (!uap->using_tx_dma) + return false; + + /* + * If we already have a TX buffer queued, but received a + * TX interrupt, it will be because we've just sent an X-char. + * Ensure the TX DMA is enabled and the TX IRQ is disabled. + */ + if (uap->dmatx.queued) { + uap->dmacr |= UART011_TXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); + uap->im &= ~UART011_TXIM; + pl011_write(uap->im, uap, REG_IMSC); + return true; + } + + /* + * We don't have a TX buffer queued, so try to queue one. + * If we successfully queued a buffer, mask the TX IRQ. + */ + if (pl011_dma_tx_refill(uap) > 0) { + uap->im &= ~UART011_TXIM; + pl011_write(uap->im, uap, REG_IMSC); + return true; + } + return false; +} + +/* + * Stop the DMA transmit (eg, due to received XOFF). + * Locking: called with port lock held and IRQs disabled. + */ +static inline void pl011_dma_tx_stop(struct uart_amba_port *uap) +{ + if (uap->dmatx.queued) { + uap->dmacr &= ~UART011_TXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); + } +} + +/* + * Try to start a DMA transmit, or in the case of an XON/OFF + * character queued for send, try to get that character out ASAP. + * Locking: called with port lock held and IRQs disabled. + * Returns: + * false if we want the TX IRQ to be enabled + * true if we have a buffer queued + */ +static inline bool pl011_dma_tx_start(struct uart_amba_port *uap) +{ + u16 dmacr; + + if (!uap->using_tx_dma) + return false; + + if (!uap->port.x_char) { + /* no X-char, try to push chars out in DMA mode */ + bool ret = true; + + if (!uap->dmatx.queued) { + if (pl011_dma_tx_refill(uap) > 0) { + uap->im &= ~UART011_TXIM; + pl011_write(uap->im, uap, REG_IMSC); + } else + ret = false; + } else if (!(uap->dmacr & UART011_TXDMAE)) { + uap->dmacr |= UART011_TXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); + } + return ret; + } + + /* + * We have an X-char to send. Disable DMA to prevent it loading + * the TX fifo, and then see if we can stuff it into the FIFO. + */ + dmacr = uap->dmacr; + uap->dmacr &= ~UART011_TXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); + + if (pl011_read(uap, REG_FR) & UART01x_FR_TXFF) { + /* + * No space in the FIFO, so enable the transmit interrupt + * so we know when there is space. Note that once we've + * loaded the character, we should just re-enable DMA. + */ + return false; + } + + pl011_write(uap->port.x_char, uap, REG_DR); + uap->port.icount.tx++; + uap->port.x_char = 0; + + /* Success - restore the DMA state */ + uap->dmacr = dmacr; + pl011_write(dmacr, uap, REG_DMACR); + + return true; +} + +/* + * Flush the transmit buffer. + * Locking: called with port lock held and IRQs disabled. + */ +static void pl011_dma_flush_buffer(struct uart_port *port) +__releases(&uap->port.lock) +__acquires(&uap->port.lock) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + if (!uap->using_tx_dma) + return; + + /* Avoid deadlock with the DMA engine callback */ + spin_unlock(&uap->port.lock); + dmaengine_terminate_all(uap->dmatx.chan); + spin_lock(&uap->port.lock); + if (uap->dmatx.queued) { + dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, + DMA_TO_DEVICE); + uap->dmatx.queued = false; + uap->dmacr &= ~UART011_TXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); + } +} + +static void pl011_dma_rx_callback(void *data); + +static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap) +{ + struct dma_chan *rxchan = uap->dmarx.chan; + struct pl011_dmarx_data *dmarx = &uap->dmarx; + struct dma_async_tx_descriptor *desc; + struct pl011_sgbuf *sgbuf; + + if (!rxchan) + return -EIO; + + /* Start the RX DMA job */ + sgbuf = uap->dmarx.use_buf_b ? + &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + desc = dmaengine_prep_slave_sg(rxchan, &sgbuf->sg, 1, + DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + /* + * If the DMA engine is busy and cannot prepare a + * channel, no big deal, the driver will fall back + * to interrupt mode as a result of this error code. + */ + if (!desc) { + uap->dmarx.running = false; + dmaengine_terminate_all(rxchan); + return -EBUSY; + } + + /* Some data to go along to the callback */ + desc->callback = pl011_dma_rx_callback; + desc->callback_param = uap; + dmarx->cookie = dmaengine_submit(desc); + dma_async_issue_pending(rxchan); + + uap->dmacr |= UART011_RXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); + uap->dmarx.running = true; + + uap->im &= ~UART011_RXIM; + pl011_write(uap->im, uap, REG_IMSC); + + return 0; +} + +/* + * This is called when either the DMA job is complete, or + * the FIFO timeout interrupt occurred. This must be called + * with the port spinlock uap->port.lock held. + */ +static void pl011_dma_rx_chars(struct uart_amba_port *uap, + u32 pending, bool use_buf_b, + bool readfifo) +{ + struct tty_port *port = &uap->port.state->port; + struct pl011_sgbuf *sgbuf = use_buf_b ? + &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + int dma_count = 0; + u32 fifotaken = 0; /* only used for vdbg() */ + + struct pl011_dmarx_data *dmarx = &uap->dmarx; + int dmataken = 0; + + if (uap->dmarx.poll_rate) { + /* The data can be taken by polling */ + dmataken = sgbuf->sg.length - dmarx->last_residue; + /* Recalculate the pending size */ + if (pending >= dmataken) + pending -= dmataken; + } + + /* Pick the remain data from the DMA */ + if (pending) { + + /* + * First take all chars in the DMA pipe, then look in the FIFO. + * Note that tty_insert_flip_buf() tries to take as many chars + * as it can. + */ + dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + pending); + + uap->port.icount.rx += dma_count; + if (dma_count < pending) + dev_warn(uap->port.dev, + "couldn't insert all characters (TTY is full?)\n"); + } + + /* Reset the last_residue for Rx DMA poll */ + if (uap->dmarx.poll_rate) + dmarx->last_residue = sgbuf->sg.length; + + /* + * Only continue with trying to read the FIFO if all DMA chars have + * been taken first. + */ + if (dma_count == pending && readfifo) { + /* Clear any error flags */ + pl011_write(UART011_OEIS | UART011_BEIS | UART011_PEIS | + UART011_FEIS, uap, REG_ICR); + + /* + * If we read all the DMA'd characters, and we had an + * incomplete buffer, that could be due to an rx error, or + * maybe we just timed out. Read any pending chars and check + * the error status. + * + * Error conditions will only occur in the FIFO, these will + * trigger an immediate interrupt and stop the DMA job, so we + * will always find the error in the FIFO, never in the DMA + * buffer. + */ + fifotaken = pl011_fifo_to_tty(uap); + } + + spin_unlock(&uap->port.lock); + dev_vdbg(uap->port.dev, + "Took %d chars from DMA buffer and %d chars from the FIFO\n", + dma_count, fifotaken); + tty_flip_buffer_push(port); + spin_lock(&uap->port.lock); +} + +static void pl011_dma_rx_irq(struct uart_amba_port *uap) +{ + struct pl011_dmarx_data *dmarx = &uap->dmarx; + struct dma_chan *rxchan = dmarx->chan; + struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? + &dmarx->sgbuf_b : &dmarx->sgbuf_a; + size_t pending; + struct dma_tx_state state; + enum dma_status dmastat; + + /* + * Pause the transfer so we can trust the current counter, + * do this before we pause the PL011 block, else we may + * overflow the FIFO. + */ + if (dmaengine_pause(rxchan)) + dev_err(uap->port.dev, "unable to pause DMA transfer\n"); + dmastat = rxchan->device->device_tx_status(rxchan, + dmarx->cookie, &state); + if (dmastat != DMA_PAUSED) + dev_err(uap->port.dev, "unable to pause DMA transfer\n"); + + /* Disable RX DMA - incoming data will wait in the FIFO */ + uap->dmacr &= ~UART011_RXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); + uap->dmarx.running = false; + + pending = sgbuf->sg.length - state.residue; + BUG_ON(pending > PL011_DMA_BUFFER_SIZE); + /* Then we terminate the transfer - we now know our residue */ + dmaengine_terminate_all(rxchan); + + /* + * This will take the chars we have so far and insert + * into the framework. + */ + pl011_dma_rx_chars(uap, pending, dmarx->use_buf_b, true); + + /* Switch buffer & re-trigger DMA job */ + dmarx->use_buf_b = !dmarx->use_buf_b; + if (pl011_dma_rx_trigger_dma(uap)) { + dev_dbg(uap->port.dev, "could not retrigger RX DMA job " + "fall back to interrupt mode\n"); + uap->im |= UART011_RXIM; + pl011_write(uap->im, uap, REG_IMSC); + } +} + +static void pl011_dma_rx_callback(void *data) +{ + struct uart_amba_port *uap = data; + struct pl011_dmarx_data *dmarx = &uap->dmarx; + struct dma_chan *rxchan = dmarx->chan; + bool lastbuf = dmarx->use_buf_b; + struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? + &dmarx->sgbuf_b : &dmarx->sgbuf_a; + size_t pending; + struct dma_tx_state state; + int ret; + + /* + * This completion interrupt occurs typically when the + * RX buffer is totally stuffed but no timeout has yet + * occurred. When that happens, we just want the RX + * routine to flush out the secondary DMA buffer while + * we immediately trigger the next DMA job. + */ + spin_lock_irq(&uap->port.lock); + /* + * Rx data can be taken by the UART interrupts during + * the DMA irq handler. So we check the residue here. + */ + rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); + pending = sgbuf->sg.length - state.residue; + BUG_ON(pending > PL011_DMA_BUFFER_SIZE); + /* Then we terminate the transfer - we now know our residue */ + dmaengine_terminate_all(rxchan); + + uap->dmarx.running = false; + dmarx->use_buf_b = !lastbuf; + ret = pl011_dma_rx_trigger_dma(uap); + + pl011_dma_rx_chars(uap, pending, lastbuf, false); + spin_unlock_irq(&uap->port.lock); + /* + * Do this check after we picked the DMA chars so we don't + * get some IRQ immediately from RX. + */ + if (ret) { + dev_dbg(uap->port.dev, "could not retrigger RX DMA job " + "fall back to interrupt mode\n"); + uap->im |= UART011_RXIM; + pl011_write(uap->im, uap, REG_IMSC); + } +} + +/* + * Stop accepting received characters, when we're shutting down or + * suspending this port. + * Locking: called with port lock held and IRQs disabled. + */ +static inline void pl011_dma_rx_stop(struct uart_amba_port *uap) +{ + /* FIXME. Just disable the DMA enable */ + uap->dmacr &= ~UART011_RXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); +} + +/* + * Timer handler for Rx DMA polling. + * Every polling, It checks the residue in the dma buffer and transfer + * data to the tty. Also, last_residue is updated for the next polling. + */ +static void pl011_dma_rx_poll(unsigned long args) +{ + struct uart_amba_port *uap = (struct uart_amba_port *)args; + struct tty_port *port = &uap->port.state->port; + struct pl011_dmarx_data *dmarx = &uap->dmarx; + struct dma_chan *rxchan = uap->dmarx.chan; + unsigned long flags = 0; + unsigned int dmataken = 0; + unsigned int size = 0; + struct pl011_sgbuf *sgbuf; + int dma_count; + struct dma_tx_state state; + + sgbuf = dmarx->use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); + if (likely(state.residue < dmarx->last_residue)) { + dmataken = sgbuf->sg.length - dmarx->last_residue; + size = dmarx->last_residue - state.residue; + dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + size); + if (dma_count == size) + dmarx->last_residue = state.residue; + dmarx->last_jiffies = jiffies; + } + tty_flip_buffer_push(port); + + /* + * If no data is received in poll_timeout, the driver will fall back + * to interrupt mode. We will retrigger DMA at the first interrupt. + */ + if (jiffies_to_msecs(jiffies - dmarx->last_jiffies) + > uap->dmarx.poll_timeout) { + + spin_lock_irqsave(&uap->port.lock, flags); + pl011_dma_rx_stop(uap); + uap->im |= UART011_RXIM; + pl011_write(uap->im, uap, REG_IMSC); + spin_unlock_irqrestore(&uap->port.lock, flags); + + uap->dmarx.running = false; + dmaengine_terminate_all(rxchan); + del_timer(&uap->dmarx.timer); + } else { + mod_timer(&uap->dmarx.timer, + jiffies + msecs_to_jiffies(uap->dmarx.poll_rate)); + } +} + +static void pl011_dma_startup(struct uart_amba_port *uap) +{ + int ret; + + if (!uap->dma_probed) + pl011_dma_probe(uap); + + if (!uap->dmatx.chan) + return; + + uap->dmatx.buf = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL | __GFP_DMA); + if (!uap->dmatx.buf) { + dev_err(uap->port.dev, "no memory for DMA TX buffer\n"); + uap->port.fifosize = uap->fifosize; + return; + } + + sg_init_one(&uap->dmatx.sg, uap->dmatx.buf, PL011_DMA_BUFFER_SIZE); + + /* The DMA buffer is now the FIFO the TTY subsystem can use */ + uap->port.fifosize = PL011_DMA_BUFFER_SIZE; + uap->using_tx_dma = true; + + if (!uap->dmarx.chan) + goto skip_rx; + + /* Allocate and map DMA RX buffers */ + ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_a, + DMA_FROM_DEVICE); + if (ret) { + dev_err(uap->port.dev, "failed to init DMA %s: %d\n", + "RX buffer A", ret); + goto skip_rx; + } + + ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_b, + DMA_FROM_DEVICE); + if (ret) { + dev_err(uap->port.dev, "failed to init DMA %s: %d\n", + "RX buffer B", ret); + pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, + DMA_FROM_DEVICE); + goto skip_rx; + } + + uap->using_rx_dma = true; + +skip_rx: + /* Turn on DMA error (RX/TX will be enabled on demand) */ + uap->dmacr |= UART011_DMAONERR; + pl011_write(uap->dmacr, uap, REG_DMACR); + + /* + * ST Micro variants has some specific dma burst threshold + * compensation. Set this to 16 bytes, so burst will only + * be issued above/below 16 bytes. + */ + if (uap->vendor->dma_threshold) + pl011_write(ST_UART011_DMAWM_RX_16 | ST_UART011_DMAWM_TX_16, + uap, REG_ST_DMAWM); + + if (uap->using_rx_dma) { + if (pl011_dma_rx_trigger_dma(uap)) + dev_dbg(uap->port.dev, "could not trigger initial " + "RX DMA job, fall back to interrupt mode\n"); + if (uap->dmarx.poll_rate) { + init_timer(&(uap->dmarx.timer)); + uap->dmarx.timer.function = pl011_dma_rx_poll; + uap->dmarx.timer.data = (unsigned long)uap; + mod_timer(&uap->dmarx.timer, + jiffies + + msecs_to_jiffies(uap->dmarx.poll_rate)); + uap->dmarx.last_residue = PL011_DMA_BUFFER_SIZE; + uap->dmarx.last_jiffies = jiffies; + } + } +} + +static void pl011_dma_shutdown(struct uart_amba_port *uap) +{ + if (!(uap->using_tx_dma || uap->using_rx_dma)) + return; + + /* Disable RX and TX DMA */ + while (pl011_read(uap, REG_FR) & uap->vendor->fr_busy) + cpu_relax(); + + spin_lock_irq(&uap->port.lock); + uap->dmacr &= ~(UART011_DMAONERR | UART011_RXDMAE | UART011_TXDMAE); + pl011_write(uap->dmacr, uap, REG_DMACR); + spin_unlock_irq(&uap->port.lock); + + if (uap->using_tx_dma) { + /* In theory, this should already be done by pl011_dma_flush_buffer */ + dmaengine_terminate_all(uap->dmatx.chan); + if (uap->dmatx.queued) { + dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, + DMA_TO_DEVICE); + uap->dmatx.queued = false; + } + + kfree(uap->dmatx.buf); + uap->using_tx_dma = false; + } + + if (uap->using_rx_dma) { + dmaengine_terminate_all(uap->dmarx.chan); + /* Clean up the RX DMA */ + pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, DMA_FROM_DEVICE); + pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_b, DMA_FROM_DEVICE); + if (uap->dmarx.poll_rate) + del_timer_sync(&uap->dmarx.timer); + uap->using_rx_dma = false; + } +} + +static inline bool pl011_dma_rx_available(struct uart_amba_port *uap) +{ + return uap->using_rx_dma; +} + +static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) +{ + return uap->using_rx_dma && uap->dmarx.running; +} + +#else +/* Blank functions if the DMA engine is not available */ +static inline void pl011_dma_probe(struct uart_amba_port *uap) +{ +} + +static inline void pl011_dma_remove(struct uart_amba_port *uap) +{ +} + +static inline void pl011_dma_startup(struct uart_amba_port *uap) +{ +} + +static inline void pl011_dma_shutdown(struct uart_amba_port *uap) +{ +} + +static inline bool pl011_dma_tx_irq(struct uart_amba_port *uap) +{ + return false; +} + +static inline void pl011_dma_tx_stop(struct uart_amba_port *uap) +{ +} + +static inline bool pl011_dma_tx_start(struct uart_amba_port *uap) +{ + return false; +} + +static inline void pl011_dma_rx_irq(struct uart_amba_port *uap) +{ +} + +static inline void pl011_dma_rx_stop(struct uart_amba_port *uap) +{ +} + +static inline int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap) +{ + return -EIO; +} + +static inline bool pl011_dma_rx_available(struct uart_amba_port *uap) +{ + return false; +} + +static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) +{ + return false; +} + +#define pl011_dma_flush_buffer NULL +#endif + +static void pl011_stop_tx(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + uap->im &= ~UART011_TXIM; + pl011_write(uap->im, uap, REG_IMSC); + pl011_dma_tx_stop(uap); +} + +static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq); + +/* Start TX with programmed I/O only (no DMA) */ +static void pl011_start_tx_pio(struct uart_amba_port *uap) +{ + if (pl011_tx_chars(uap, false)) { + uap->im |= UART011_TXIM; + pl011_write(uap->im, uap, REG_IMSC); + } +} + +static void pl011_start_tx(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + if (!pl011_dma_tx_start(uap)) + pl011_start_tx_pio(uap); +} + +static void pl011_stop_rx(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + uap->im &= ~(UART011_RXIM|UART011_RTIM|UART011_FEIM| + UART011_PEIM|UART011_BEIM|UART011_OEIM); + pl011_write(uap->im, uap, REG_IMSC); + + pl011_dma_rx_stop(uap); +} + +static void pl011_enable_ms(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + uap->im |= UART011_RIMIM|UART011_CTSMIM|UART011_DCDMIM|UART011_DSRMIM; + pl011_write(uap->im, uap, REG_IMSC); +} + +static void pl011_rx_chars(struct uart_amba_port *uap) +__releases(&uap->port.lock) +__acquires(&uap->port.lock) +{ + pl011_fifo_to_tty(uap); + + spin_unlock(&uap->port.lock); + tty_flip_buffer_push(&uap->port.state->port); + /* + * If we were temporarily out of DMA mode for a while, + * attempt to switch back to DMA mode again. + */ + if (pl011_dma_rx_available(uap)) { + if (pl011_dma_rx_trigger_dma(uap)) { + dev_dbg(uap->port.dev, "could not trigger RX DMA job " + "fall back to interrupt mode again\n"); + uap->im |= UART011_RXIM; + pl011_write(uap->im, uap, REG_IMSC); + } else { +#ifdef CONFIG_DMA_ENGINE + /* Start Rx DMA poll */ + if (uap->dmarx.poll_rate) { + uap->dmarx.last_jiffies = jiffies; + uap->dmarx.last_residue = PL011_DMA_BUFFER_SIZE; + mod_timer(&uap->dmarx.timer, + jiffies + + msecs_to_jiffies(uap->dmarx.poll_rate)); + } +#endif + } + } + spin_lock(&uap->port.lock); +} + +static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c, + bool from_irq) +{ + if (unlikely(!from_irq) && + pl011_read(uap, REG_FR) & UART01x_FR_TXFF) + return false; /* unable to transmit character */ + + pl011_write(c, uap, REG_DR); + mb(); + uap->port.icount.tx++; + + return true; +} + +/* Returns true if tx interrupts have to be (kept) enabled */ +static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) +{ + struct circ_buf *xmit = &uap->port.state->xmit; + int count = uap->fifosize >> 1; + + if (uap->port.x_char) { + if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) + return true; + uap->port.x_char = 0; + --count; + } + if (uart_circ_empty(xmit) || uart_tx_stopped(&uap->port)) { + pl011_stop_tx(&uap->port); + return false; + } + + /* If we are using DMA mode, try to send some characters. */ + if (pl011_dma_tx_irq(uap)) + return true; + + do { + if (likely(from_irq) && count-- == 0) + break; + + if (!pl011_tx_char(uap, xmit->buf[xmit->tail], from_irq)) + break; + + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + } while (!uart_circ_empty(xmit)); + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&uap->port); + + if (uart_circ_empty(xmit)) { + pl011_stop_tx(&uap->port); + return false; + } + return true; +} + +static void pl011_modem_status(struct uart_amba_port *uap) +{ + unsigned int status, delta; + + status = pl011_read(uap, REG_FR) & UART01x_FR_MODEM_ANY; + + delta = status ^ uap->old_status; + uap->old_status = status; + + if (!delta) + return; + + if (delta & UART01x_FR_DCD) + uart_handle_dcd_change(&uap->port, status & UART01x_FR_DCD); + + if (delta & uap->vendor->fr_dsr) + uap->port.icount.dsr++; + + if (delta & uap->vendor->fr_cts) + uart_handle_cts_change(&uap->port, + status & uap->vendor->fr_cts); + + wake_up_interruptible(&uap->port.state->port.delta_msr_wait); +} + +static void check_apply_cts_event_workaround(struct uart_amba_port *uap) +{ + unsigned int dummy_read; + + if (!uap->vendor->cts_event_workaround) + return; + + /* workaround to make sure that all bits are unlocked.. */ + pl011_write(0x00, uap, REG_ICR); + + /* + * WA: introduce 26ns(1 uart clk) delay before W1C; + * single apb access will incur 2 pclk(133.12Mhz) delay, + * so add 2 dummy reads + */ + dummy_read = pl011_read(uap, REG_ICR); + dummy_read = pl011_read(uap, REG_ICR); +} + +static irqreturn_t pl011_int(int irq, void *dev_id) +{ + struct uart_amba_port *uap = dev_id; + unsigned long flags; + unsigned int status, pass_counter = AMBA_ISR_PASS_LIMIT; + u16 imsc; + int handled = 0; + + spin_lock_irqsave(&uap->port.lock, flags); + imsc = pl011_read(uap, REG_IMSC); + status = pl011_read(uap, REG_RIS) & imsc; + if (status) { + do { + check_apply_cts_event_workaround(uap); + + pl011_write(status & ~(UART011_TXIS|UART011_RTIS| + UART011_RXIS), + uap, REG_ICR); + + if (status & (UART011_RTIS|UART011_RXIS)) { + if (pl011_dma_rx_running(uap)) + pl011_dma_rx_irq(uap); + else + pl011_rx_chars(uap); + } + if (status & (UART011_DSRMIS|UART011_DCDMIS| + UART011_CTSMIS|UART011_RIMIS)) + pl011_modem_status(uap); + if (status & UART011_TXIS) + pl011_tx_chars(uap, true); + + if (pass_counter-- == 0) + break; + + status = pl011_read(uap, REG_RIS) & imsc; + } while (status != 0); + handled = 1; + } + + spin_unlock_irqrestore(&uap->port.lock, flags); + + return IRQ_RETVAL(handled); +} + +static unsigned int pl011_tx_empty(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + /* Allow feature register bits to be inverted to work around errata */ + unsigned int status = pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr; + + return status & (uap->vendor->fr_busy | UART01x_FR_TXFF) ? + 0 : TIOCSER_TEMT; +} + +static unsigned int pl011_get_mctrl(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + unsigned int result = 0; + unsigned int status = pl011_read(uap, REG_FR); + +#define TIOCMBIT(uartbit, tiocmbit) \ + if (status & uartbit) \ + result |= tiocmbit + + TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR); + TIOCMBIT(uap->vendor->fr_dsr, TIOCM_DSR); + TIOCMBIT(uap->vendor->fr_cts, TIOCM_CTS); + TIOCMBIT(uap->vendor->fr_ri, TIOCM_RNG); +#undef TIOCMBIT + return result; +} + +static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + unsigned int cr; + + cr = pl011_read(uap, REG_CR); + +#define TIOCMBIT(tiocmbit, uartbit) \ + if (mctrl & tiocmbit) \ + cr |= uartbit; \ + else \ + cr &= ~uartbit + + TIOCMBIT(TIOCM_RTS, UART011_CR_RTS); + TIOCMBIT(TIOCM_DTR, UART011_CR_DTR); + TIOCMBIT(TIOCM_OUT1, UART011_CR_OUT1); + TIOCMBIT(TIOCM_OUT2, UART011_CR_OUT2); + TIOCMBIT(TIOCM_LOOP, UART011_CR_LBE); + + if (uap->autorts) { + /* We need to disable auto-RTS if we want to turn RTS off */ + TIOCMBIT(TIOCM_RTS, UART011_CR_RTSEN); + } +#undef TIOCMBIT + + pl011_write(cr, uap, REG_CR); +} + +static void pl011_break_ctl(struct uart_port *port, int break_state) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + unsigned long flags; + unsigned int lcr_h; + + spin_lock_irqsave(&uap->port.lock, flags); + lcr_h = pl011_read(uap, REG_LCRH_TX); + if (break_state == -1) + lcr_h |= UART01x_LCRH_BRK; + else + lcr_h &= ~UART01x_LCRH_BRK; + pl011_write(lcr_h, uap, REG_LCRH_TX); + spin_unlock_irqrestore(&uap->port.lock, flags); +} + +#ifdef CONFIG_CONSOLE_POLL + +static void pl011_quiesce_irqs(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + pl011_write(pl011_read(uap, REG_MIS), uap, REG_ICR); + /* + * There is no way to clear TXIM as this is "ready to transmit IRQ", so + * we simply mask it. start_tx() will unmask it. + * + * Note we can race with start_tx(), and if the race happens, the + * polling user might get another interrupt just after we clear it. + * But it should be OK and can happen even w/o the race, e.g. + * controller immediately got some new data and raised the IRQ. + * + * And whoever uses polling routines assumes that it manages the device + * (including tx queue), so we're also fine with start_tx()'s caller + * side. + */ + pl011_write(pl011_read(uap, REG_IMSC) & ~UART011_TXIM, uap, + REG_IMSC); +} + +static int pl011_get_poll_char(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + unsigned int status; + + /* + * The caller might need IRQs lowered, e.g. if used with KDB NMI + * debugger. + */ + pl011_quiesce_irqs(port); + + status = pl011_read(uap, REG_FR); + if (status & UART01x_FR_RXFE) + return NO_POLL_CHAR; + + return pl011_read(uap, REG_DR); +} + +static void pl011_put_poll_char(struct uart_port *port, + unsigned char ch) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + while (pl011_read(uap, REG_FR) & UART01x_FR_TXFF) + cpu_relax(); + + pl011_write(ch, uap, REG_DR); +} + +#endif /* CONFIG_CONSOLE_POLL */ + +unsigned long pl011_clk_round(unsigned long clk) +{ + unsigned long scaler; + + /* + * If increasing a clock by less than 0.1% changes it + * from ..999.. to ..000.., round up. + */ + scaler = 1; + while (scaler * 100000 < clk) + scaler *= 10; + if ((clk + scaler - 1)/scaler % 1000 == 0) + clk = (clk/scaler + 1) * scaler; + + return clk; +} + +static int pl011_hwinit(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + int retval; + + /* Optionaly enable pins to be muxed in and configured */ + pinctrl_pm_select_default_state(port->dev); + + /* + * Try to enable the clock producer. + */ + retval = clk_prepare_enable(uap->clk); + if (retval) + return retval; + + uap->port.uartclk = pl011_clk_round(clk_get_rate(uap->clk)); + + /* Clear pending error and receive interrupts */ + pl011_write(UART011_OEIS | UART011_BEIS | UART011_PEIS | + UART011_FEIS | UART011_RTIS | UART011_RXIS, + uap, REG_ICR); + + /* + * Save interrupts enable mask, and enable RX interrupts in case if + * the interrupt is used for NMI entry. + */ + uap->im = pl011_read(uap, REG_IMSC); + pl011_write(UART011_RTIM | UART011_RXIM, uap, REG_IMSC); + + if (dev_get_platdata(uap->port.dev)) { + struct amba_pl011_data *plat; + + plat = dev_get_platdata(uap->port.dev); + if (plat->init) + plat->init(); + } + return 0; +} + +static bool pl011_split_lcrh(const struct uart_amba_port *uap) +{ + return pl011_reg_to_offset(uap, REG_LCRH_RX) != + pl011_reg_to_offset(uap, REG_LCRH_TX); +} + +static void pl011_write_lcr_h(struct uart_amba_port *uap, unsigned int lcr_h) +{ + pl011_write(lcr_h, uap, REG_LCRH_RX); + if (pl011_split_lcrh(uap)) { + int i; + /* + * Wait 10 PCLKs before writing LCRH_TX register, + * to get this delay write read only register 10 times + */ + for (i = 0; i < 10; ++i) + pl011_write(0xff, uap, REG_MIS); + pl011_write(lcr_h, uap, REG_LCRH_TX); + } +} + +static int pl011_allocate_irq(struct uart_amba_port *uap) +{ + pl011_write(uap->im, uap, REG_IMSC); + + return request_irq(uap->port.irq, pl011_int, 0, "uart-pl011", uap); +} + +/* + * Enable interrupts, only timeouts when using DMA + * if initial RX DMA job failed, start in interrupt mode + * as well. + */ +static void pl011_enable_interrupts(struct uart_amba_port *uap) +{ + spin_lock_irq(&uap->port.lock); + + /* Clear out any spuriously appearing RX interrupts */ + pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR); + uap->im = UART011_RTIM; + if (!pl011_dma_rx_running(uap)) + uap->im |= UART011_RXIM; + pl011_write(uap->im, uap, REG_IMSC); + spin_unlock_irq(&uap->port.lock); +} + +static int pl011_startup(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + unsigned int cr; + int retval; + + retval = pl011_hwinit(port); + if (retval) + goto clk_dis; + + retval = pl011_allocate_irq(uap); + if (retval) + goto clk_dis; + + pl011_write(uap->vendor->ifls, uap, REG_IFLS); + + spin_lock_irq(&uap->port.lock); + + /* restore RTS and DTR */ + cr = uap->old_cr & (UART011_CR_RTS | UART011_CR_DTR); + cr |= UART01x_CR_UARTEN | UART011_CR_RXE | UART011_CR_TXE; + pl011_write(cr, uap, REG_CR); + + spin_unlock_irq(&uap->port.lock); + + /* + * initialise the old status of the modem signals + */ + uap->old_status = pl011_read(uap, REG_FR) & UART01x_FR_MODEM_ANY; + + /* Startup DMA */ + pl011_dma_startup(uap); + + pl011_enable_interrupts(uap); + + return 0; + + clk_dis: + clk_disable_unprepare(uap->clk); + return retval; +} + +static int sbsa_uart_startup(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + int retval; + + retval = pl011_hwinit(port); + if (retval) + return retval; + + retval = pl011_allocate_irq(uap); + if (retval) + return retval; + + /* The SBSA UART does not support any modem status lines. */ + uap->old_status = 0; + + pl011_enable_interrupts(uap); + + return 0; +} + +static void pl011_shutdown_channel(struct uart_amba_port *uap, + unsigned int lcrh) +{ + unsigned long val; + + val = pl011_read(uap, lcrh); + val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN); + pl011_write(val, uap, lcrh); +} + +/* + * disable the port. It should not disable RTS and DTR. + * Also RTS and DTR state should be preserved to restore + * it during startup(). + */ +static void pl011_disable_uart(struct uart_amba_port *uap) +{ + unsigned int cr; + + uap->autorts = false; + spin_lock_irq(&uap->port.lock); + cr = pl011_read(uap, REG_CR); + uap->old_cr = cr; + cr &= UART011_CR_RTS | UART011_CR_DTR; + cr |= UART01x_CR_UARTEN | UART011_CR_TXE; + pl011_write(cr, uap, REG_CR); + spin_unlock_irq(&uap->port.lock); + + /* + * disable break condition and fifos + */ + pl011_shutdown_channel(uap, REG_LCRH_RX); + if (pl011_split_lcrh(uap)) + pl011_shutdown_channel(uap, REG_LCRH_TX); +} + +static void pl011_disable_interrupts(struct uart_amba_port *uap) +{ + spin_lock_irq(&uap->port.lock); + + /* mask all interrupts and clear all pending ones */ + uap->im = 0; + pl011_write(uap->im, uap, REG_IMSC); + pl011_write(0xffff, uap, REG_ICR); + + spin_unlock_irq(&uap->port.lock); +} + +static void pl011_shutdown(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + pl011_disable_interrupts(uap); + + pl011_dma_shutdown(uap); + + free_irq(uap->port.irq, uap); + + pl011_disable_uart(uap); + + /* + * Shut down the clock producer + */ + clk_disable_unprepare(uap->clk); + /* Optionally let pins go into sleep states */ + pinctrl_pm_select_sleep_state(port->dev); + + if (dev_get_platdata(uap->port.dev)) { + struct amba_pl011_data *plat; + + plat = dev_get_platdata(uap->port.dev); + if (plat->exit) + plat->exit(); + } + + if (uap->port.ops->flush_buffer) + uap->port.ops->flush_buffer(port); +} + +static void sbsa_uart_shutdown(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + pl011_disable_interrupts(uap); + + free_irq(uap->port.irq, uap); + + if (uap->port.ops->flush_buffer) + uap->port.ops->flush_buffer(port); +} + +static void +pl011_setup_status_masks(struct uart_port *port, struct ktermios *termios) +{ + port->read_status_mask = UART011_DR_OE | 255; + if (termios->c_iflag & INPCK) + port->read_status_mask |= UART011_DR_FE | UART011_DR_PE; + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) + port->read_status_mask |= UART011_DR_BE; + + /* + * Characters to ignore + */ + port->ignore_status_mask = 0; + if (termios->c_iflag & IGNPAR) + port->ignore_status_mask |= UART011_DR_FE | UART011_DR_PE; + if (termios->c_iflag & IGNBRK) { + port->ignore_status_mask |= UART011_DR_BE; + /* + * If we're ignoring parity and break indicators, + * ignore overruns too (for real raw support). + */ + if (termios->c_iflag & IGNPAR) + port->ignore_status_mask |= UART011_DR_OE; + } + + /* + * Ignore all characters if CREAD is not set. + */ + if ((termios->c_cflag & CREAD) == 0) + port->ignore_status_mask |= UART_DUMMY_DR_RX; +} + +static void +pl011_set_termios(struct uart_port *port, struct ktermios *termios, + struct ktermios *old) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + unsigned int lcr_h, old_cr; + unsigned long flags; + unsigned int baud, quot, clkdiv; + + if (uap->vendor->oversampling) + clkdiv = 8; + else + clkdiv = 16; + + /* + * Ask the core to calculate the divisor for us. + */ + baud = uart_get_baud_rate(port, termios, old, 0, + port->uartclk / clkdiv); +#ifdef CONFIG_DMA_ENGINE + /* + * Adjust RX DMA polling rate with baud rate if not specified. + */ + if (uap->dmarx.auto_poll_rate) + uap->dmarx.poll_rate = DIV_ROUND_UP(10000000, baud); +#endif + + if (baud > port->uartclk/16) + quot = DIV_ROUND_CLOSEST(port->uartclk * 8, baud); + else + quot = DIV_ROUND_CLOSEST(port->uartclk * 4, baud); + + switch (termios->c_cflag & CSIZE) { + case CS5: + lcr_h = UART01x_LCRH_WLEN_5; + break; + case CS6: + lcr_h = UART01x_LCRH_WLEN_6; + break; + case CS7: + lcr_h = UART01x_LCRH_WLEN_7; + break; + default: // CS8 + lcr_h = UART01x_LCRH_WLEN_8; + break; + } + if (termios->c_cflag & CSTOPB) + lcr_h |= UART01x_LCRH_STP2; + if (termios->c_cflag & PARENB) { + lcr_h |= UART01x_LCRH_PEN; + if (!(termios->c_cflag & PARODD)) + lcr_h |= UART01x_LCRH_EPS; + if (termios->c_cflag & CMSPAR) + lcr_h |= UART011_LCRH_SPS; + } + if (uap->fifosize > 1) + lcr_h |= UART01x_LCRH_FEN; + + spin_lock_irqsave(&port->lock, flags); + + /* + * Update the per-port timeout. + */ + uart_update_timeout(port, termios->c_cflag, baud); + + pl011_setup_status_masks(port, termios); + + if (UART_ENABLE_MS(port, termios->c_cflag)) + pl011_enable_ms(port); + + /* first, disable everything */ + old_cr = pl011_read(uap, REG_CR); + pl011_write(0, uap, REG_CR); + + if (termios->c_cflag & CRTSCTS) { + if (old_cr & UART011_CR_RTS) + old_cr |= UART011_CR_RTSEN; + + old_cr |= UART011_CR_CTSEN; + uap->autorts = true; + port->status |= UPSTAT_AUTOCTS; + } else { + old_cr &= ~(UART011_CR_CTSEN | UART011_CR_RTSEN); + uap->autorts = false; + port->status &= ~UPSTAT_AUTOCTS; + } + + if (uap->vendor->oversampling) { + if (baud > port->uartclk / 16) + old_cr |= ST_UART011_CR_OVSFACT; + else + old_cr &= ~ST_UART011_CR_OVSFACT; + } + + /* + * Workaround for the ST Micro oversampling variants to + * increase the bitrate slightly, by lowering the divisor, + * to avoid delayed sampling of start bit at high speeds, + * else we see data corruption. + */ + if (uap->vendor->oversampling) { + if ((baud >= 3000000) && (baud < 3250000) && (quot > 1)) + quot -= 1; + else if ((baud > 3250000) && (quot > 2)) + quot -= 2; + } + /* Set baud rate */ + pl011_write(quot & 0x3f, uap, REG_FBRD); + pl011_write(quot >> 6, uap, REG_IBRD); + + /* + * ----------v----------v----------v----------v----- + * NOTE: REG_LCRH_TX and REG_LCRH_RX MUST BE WRITTEN AFTER + * REG_FBRD & REG_IBRD. + * ----------^----------^----------^----------^----- + */ + pl011_write_lcr_h(uap, lcr_h); + pl011_write(old_cr, uap, REG_CR); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static void +sbsa_uart_set_termios(struct uart_port *port, struct ktermios *termios, + struct ktermios *old) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + unsigned long flags; + + tty_termios_encode_baud_rate(termios, uap->fixed_baud, uap->fixed_baud); + + /* The SBSA UART only supports 8n1 without hardware flow control. */ + termios->c_cflag &= ~(CSIZE | CSTOPB | PARENB | PARODD); + termios->c_cflag &= ~(CMSPAR | CRTSCTS); + termios->c_cflag |= CS8 | CLOCAL; + + spin_lock_irqsave(&port->lock, flags); + uart_update_timeout(port, CS8, uap->fixed_baud); + pl011_setup_status_masks(port, termios); + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *pl011_type(struct uart_port *port) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + return uap->port.type == PORT_AMBA ? uap->type : NULL; +} + +/* + * Release the memory region(s) being used by 'port' + */ +static void pl011_release_port(struct uart_port *port) +{ + release_mem_region(port->mapbase, SZ_4K); +} + +/* + * Request the memory region(s) being used by 'port' + */ +static int pl011_request_port(struct uart_port *port) +{ + return request_mem_region(port->mapbase, SZ_4K, "uart-pl011") + != NULL ? 0 : -EBUSY; +} + +/* + * Configure/autoconfigure the port. + */ +static void pl011_config_port(struct uart_port *port, int flags) +{ + if (flags & UART_CONFIG_TYPE) { + port->type = PORT_AMBA; + pl011_request_port(port); + } +} + +/* + * verify the new serial_struct (for TIOCSSERIAL). + */ +static int pl011_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + int ret = 0; + if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA) + ret = -EINVAL; + if (ser->irq < 0 || ser->irq >= nr_irqs) + ret = -EINVAL; + if (ser->baud_base < 9600) + ret = -EINVAL; + return ret; +} + +static const struct uart_ops amba_pl011_pops = { + .tx_empty = pl011_tx_empty, + .set_mctrl = pl011_set_mctrl, + .get_mctrl = pl011_get_mctrl, + .stop_tx = pl011_stop_tx, + .start_tx = pl011_start_tx, + .stop_rx = pl011_stop_rx, + .enable_ms = pl011_enable_ms, + .break_ctl = pl011_break_ctl, + .startup = pl011_startup, + .shutdown = pl011_shutdown, + .flush_buffer = pl011_dma_flush_buffer, + .set_termios = pl011_set_termios, + .type = pl011_type, + .release_port = pl011_release_port, + .request_port = pl011_request_port, + .config_port = pl011_config_port, + .verify_port = pl011_verify_port, +#ifdef CONFIG_CONSOLE_POLL + .poll_init = pl011_hwinit, + .poll_get_char = pl011_get_poll_char, + .poll_put_char = pl011_put_poll_char, +#endif +}; + +static void sbsa_uart_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ +} + +static unsigned int sbsa_uart_get_mctrl(struct uart_port *port) +{ + return 0; +} + +static const struct uart_ops sbsa_uart_pops = { + .tx_empty = pl011_tx_empty, + .set_mctrl = sbsa_uart_set_mctrl, + .get_mctrl = sbsa_uart_get_mctrl, + .stop_tx = pl011_stop_tx, + .start_tx = pl011_start_tx, + .stop_rx = pl011_stop_rx, + .startup = sbsa_uart_startup, + .shutdown = sbsa_uart_shutdown, + .set_termios = sbsa_uart_set_termios, + .type = pl011_type, + .release_port = pl011_release_port, + .request_port = pl011_request_port, + .config_port = pl011_config_port, + .verify_port = pl011_verify_port, +#ifdef CONFIG_CONSOLE_POLL + .poll_init = pl011_hwinit, + .poll_get_char = pl011_get_poll_char, + .poll_put_char = pl011_put_poll_char, +#endif +}; + +static struct uart_amba_port *amba_ports[UART_NR]; + +#ifdef CONFIG_SERIAL_AMBA_PL011_CONSOLE + +static void pl011_console_putchar(struct uart_port *port, int ch) +{ + struct uart_amba_port *uap = + container_of(port, struct uart_amba_port, port); + + while (pl011_read(uap, REG_FR) & UART01x_FR_TXFF) + cpu_relax(); + pl011_write(ch, uap, REG_DR); +} + +static void +pl011_console_write(struct console *co, const char *s, unsigned int count) +{ + struct uart_amba_port *uap = amba_ports[co->index]; + unsigned int old_cr = 0, new_cr; + unsigned long flags; + int locked = 1; + + clk_enable(uap->clk); + + local_irq_save(flags); + if (uap->port.sysrq) + locked = 0; + else if (oops_in_progress) + locked = spin_trylock(&uap->port.lock); + else + spin_lock(&uap->port.lock); + + /* + * First save the CR then disable the interrupts + */ + if (!uap->vendor->always_enabled) { + old_cr = pl011_read(uap, REG_CR); + new_cr = old_cr & ~UART011_CR_CTSEN; + new_cr |= UART01x_CR_UARTEN | UART011_CR_TXE; + pl011_write(new_cr, uap, REG_CR); + } + + uart_console_write(&uap->port, s, count, pl011_console_putchar); + + /* + * Finally, wait for transmitter to become empty and restore the + * TCR. Allow feature register bits to be inverted to work around + * errata. + */ + while ((pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr) + & uap->vendor->fr_busy) + cpu_relax(); + if (!uap->vendor->always_enabled) + pl011_write(old_cr, uap, REG_CR); + + if (locked) + spin_unlock(&uap->port.lock); + local_irq_restore(flags); + + clk_disable(uap->clk); +} + +static void __init +pl011_console_get_options(struct uart_amba_port *uap, int *baud, + int *parity, int *bits) +{ + if (pl011_read(uap, REG_CR) & UART01x_CR_UARTEN) { + unsigned int lcr_h, ibrd, fbrd; + + lcr_h = pl011_read(uap, REG_LCRH_TX); + + *parity = 'n'; + if (lcr_h & UART01x_LCRH_PEN) { + if (lcr_h & UART01x_LCRH_EPS) + *parity = 'e'; + else + *parity = 'o'; + } + + if ((lcr_h & 0x60) == UART01x_LCRH_WLEN_7) + *bits = 7; + else + *bits = 8; + + ibrd = pl011_read(uap, REG_IBRD); + fbrd = pl011_read(uap, REG_FBRD); + + *baud = uap->port.uartclk * 4 / (64 * ibrd + fbrd); + + if (uap->vendor->oversampling) { + if (pl011_read(uap, REG_CR) + & ST_UART011_CR_OVSFACT) + *baud *= 2; + } + } +} + +static int __init pl011_console_setup(struct console *co, char *options) +{ + struct uart_amba_port *uap; + int baud = 38400; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + int ret; + + /* + * Check whether an invalid uart number has been specified, and + * if so, search for the first available port that does have + * console support. + */ + if (co->index >= UART_NR) + co->index = 0; + uap = amba_ports[co->index]; + if (!uap) + return -ENODEV; + + /* Allow pins to be muxed in and configured */ + pinctrl_pm_select_default_state(uap->port.dev); + + ret = clk_prepare(uap->clk); + if (ret) + return ret; + + if (dev_get_platdata(uap->port.dev)) { + struct amba_pl011_data *plat; + + plat = dev_get_platdata(uap->port.dev); + if (plat->init) + plat->init(); + } + + uap->port.uartclk = pl011_clk_round(clk_get_rate(uap->clk)); + + if (uap->vendor->fixed_options) { + baud = uap->fixed_baud; + } else { + if (options) + uart_parse_options(options, + &baud, &parity, &bits, &flow); + else + pl011_console_get_options(uap, &baud, &parity, &bits); + } + + return uart_set_options(&uap->port, co, baud, parity, bits, flow); +} + +/** + * pl011_console_match - non-standard console matching + * @co: registering console + * @name: name from console command line + * @idx: index from console command line + * @options: ptr to option string from console command line + * + * Only attempts to match console command lines of the form: + * console=pl011,mmio|mmio32,[,] + * console=pl011,0x[,] + * This form is used to register an initial earlycon boot console and + * replace it with the amba_console at pl011 driver init. + * + * Performs console setup for a match (as required by interface) + * If no are specified, then assume the h/w is already setup. + * + * Returns 0 if console matches; otherwise non-zero to use default matching + */ +static int __init pl011_console_match(struct console *co, char *name, int idx, + char *options) +{ + unsigned char iotype; + resource_size_t addr; + int i; + + /* + * Systems affected by the Qualcomm Technologies QDF2400 E44 erratum + * have a distinct console name, so make sure we check for that. + * The actual implementation of the erratum occurs in the probe + * function. + */ + if ((strcmp(name, "qdf2400_e44") != 0) && (strcmp(name, "pl011") != 0)) + return -ENODEV; + + if (uart_parse_earlycon(options, &iotype, &addr, &options)) + return -ENODEV; + + if (iotype != UPIO_MEM && iotype != UPIO_MEM32) + return -ENODEV; + + /* try to match the port specified on the command line */ + for (i = 0; i < ARRAY_SIZE(amba_ports); i++) { + struct uart_port *port; + + if (!amba_ports[i]) + continue; + + port = &amba_ports[i]->port; + + if (port->mapbase != addr) + continue; + + co->index = i; + port->cons = co; + return pl011_console_setup(co, options); + } + + return -ENODEV; +} + +static struct uart_driver amba_reg; +static struct console amba_console = { + .name = "ttyAMA", + .write = pl011_console_write, + .device = uart_console_device, + .setup = pl011_console_setup, + .match = pl011_console_match, + .flags = CON_PRINTBUFFER | CON_ANYTIME, + .index = -1, + .data = &amba_reg, +}; + +#define AMBA_CONSOLE (&amba_console) + +static void qdf2400_e44_putc(struct uart_port *port, int c) +{ + while (readl(port->membase + UART01x_FR) & UART01x_FR_TXFF) + cpu_relax(); + writel(c, port->membase + UART01x_DR); + while (!(readl(port->membase + UART01x_FR) & UART011_FR_TXFE)) + cpu_relax(); +} + +static void qdf2400_e44_early_write(struct console *con, const char *s, unsigned n) +{ + struct earlycon_device *dev = con->data; + + uart_console_write(&dev->port, s, n, qdf2400_e44_putc); +} + +static void pl011_putc(struct uart_port *port, int c) +{ + while (readl(port->membase + UART01x_FR) & UART01x_FR_TXFF) + cpu_relax(); + if (port->iotype == UPIO_MEM32) + writel(c, port->membase + UART01x_DR); + else + writeb(c, port->membase + UART01x_DR); + while (readl(port->membase + UART01x_FR) & UART01x_FR_BUSY) + cpu_relax(); +} + +static void pl011_early_write(struct console *con, const char *s, unsigned n) +{ + struct earlycon_device *dev = con->data; + + uart_console_write(&dev->port, s, n, pl011_putc); +} + +/* + * On non-ACPI systems, earlycon is enabled by specifying + * "earlycon=pl011,
" on the kernel command line. + * + * On ACPI ARM64 systems, an "early" console is enabled via the SPCR table, + * by specifying only "earlycon" on the command line. Because it requires + * SPCR, the console starts after ACPI is parsed, which is later than a + * traditional early console. + * + * To get the traditional early console that starts before ACPI is parsed, + * specify the full "earlycon=pl011,
" option. + */ +static int __init pl011_early_console_setup(struct earlycon_device *device, + const char *opt) +{ + if (!device->port.membase) + return -ENODEV; + + device->con->write = pl011_early_write; + + return 0; +} +OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup); +OF_EARLYCON_DECLARE(pl011, "arm,sbsa-uart", pl011_early_console_setup); + +/* + * On Qualcomm Datacenter Technologies QDF2400 SOCs affected by + * Erratum 44, traditional earlycon can be enabled by specifying + * "earlycon=qdf2400_e44,
". Any options are ignored. + * + * Alternatively, you can just specify "earlycon", and the early console + * will be enabled with the information from the SPCR table. In this + * case, the SPCR code will detect the need for the E44 work-around, + * and set the console name to "qdf2400_e44". + */ +static int __init +qdf2400_e44_early_console_setup(struct earlycon_device *device, + const char *opt) +{ + if (!device->port.membase) + return -ENODEV; + + device->con->write = qdf2400_e44_early_write; + return 0; +} +EARLYCON_DECLARE(qdf2400_e44, qdf2400_e44_early_console_setup); + +#else +#define AMBA_CONSOLE NULL +#endif + +static struct uart_driver amba_reg = { + .owner = THIS_MODULE, + .driver_name = "ttyAMA", + .dev_name = "ttyAMA", + .major = SERIAL_AMBA_MAJOR, + .minor = SERIAL_AMBA_MINOR, + .nr = UART_NR, + .cons = AMBA_CONSOLE, +}; + +#if 0 +static int pl011_probe_dt_alias(int index, struct device *dev) +{ + struct device_node *np; + static bool seen_dev_with_alias = false; + static bool seen_dev_without_alias = false; + int ret = index; + + if (!IS_ENABLED(CONFIG_OF)) + return ret; + + np = dev->of_node; + if (!np) + return ret; + + ret = of_alias_get_id(np, "serial"); + if (ret < 0) { + seen_dev_without_alias = true; + ret = index; + } else { + seen_dev_with_alias = true; + if (ret >= ARRAY_SIZE(amba_ports) || amba_ports[ret] != NULL) { + dev_warn(dev, "requested serial port %d not available.\n", ret); + ret = index; + } + } + + if (seen_dev_with_alias && seen_dev_without_alias) + dev_warn(dev, "aliased and non-aliased serial devices found in device tree. Serial port enumeration may be unpredictable.\n"); + + return ret; +} +#endif + +/* unregisters the driver also if no more ports are left */ +static void pl011_unregister_port(struct uart_amba_port *uap) +{ + int i; + bool busy = false; + + for (i = 0; i < ARRAY_SIZE(amba_ports); i++) { + if (amba_ports[i] == uap) + amba_ports[i] = NULL; + else if (amba_ports[i]) + busy = true; + } + pl011_dma_remove(uap); + if (!busy) + uart_unregister_driver(&amba_reg); +} + +static int pl011_find_free_port(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(amba_ports); i++) + if (amba_ports[i] == NULL) + return i; + + return -EBUSY; +} + +static int pl011_setup_port(struct device *dev, struct uart_amba_port *uap, + struct resource *mmiobase, int index) +{ + void __iomem *base; + + base = devm_ioremap_resource(dev, mmiobase); + if (IS_ERR(base)) + return PTR_ERR(base); + + /* Don't use DT serial aliases - it causes the device to + be renumbered to ttyAMA1 if it is the second serial port in the + system, even though the other one is ttyS0. The 8250 driver + doesn't use this logic, so always remains ttyS0. + index = pl011_probe_dt_alias(index, dev); + */ + + uap->old_cr = 0; + uap->port.dev = dev; + uap->port.mapbase = mmiobase->start; + uap->port.membase = base; + uap->port.fifosize = uap->fifosize; + uap->port.flags = UPF_BOOT_AUTOCONF; + uap->port.line = index; + + amba_ports[index] = uap; + + return 0; +} + +static int pl011_register_port(struct uart_amba_port *uap) +{ + int ret; + + /* Ensure interrupts from this UART are masked and cleared */ + pl011_write(0, uap, REG_IMSC); + pl011_write(0xffff, uap, REG_ICR); + + if (!amba_reg.state) { + ret = uart_register_driver(&amba_reg); + if (ret < 0) { + dev_err(uap->port.dev, + "Failed to register AMBA-PL011 driver\n"); + return ret; + } + } + + ret = uart_add_one_port(&amba_reg, &uap->port); + if (ret) + pl011_unregister_port(uap); + + return ret; +} + +static int pl011_probe(struct amba_device *dev, const struct amba_id *id) +{ + struct uart_amba_port *uap; + struct vendor_data *vendor = id->data; + int portnr, ret; + + portnr = pl011_find_free_port(); + if (portnr < 0) + return portnr; + + uap = devm_kzalloc(&dev->dev, sizeof(struct uart_amba_port), + GFP_KERNEL); + if (!uap) + return -ENOMEM; + + uap->clk = devm_clk_get(&dev->dev, NULL); + if (IS_ERR(uap->clk)) + return PTR_ERR(uap->clk); + + if (of_property_read_bool(dev->dev.of_node, "cts-event-workaround")) { + vendor->cts_event_workaround = true; + dev_info(&dev->dev, "cts_event_workaround enabled\n"); + } + + uap->reg_offset = vendor->reg_offset; + uap->vendor = vendor; + uap->fifosize = vendor->get_fifosize(dev); + uap->port.iotype = vendor->access_32b ? UPIO_MEM32 : UPIO_MEM; + uap->port.irq = dev->irq[0]; + uap->port.ops = &amba_pl011_pops; + + snprintf(uap->type, sizeof(uap->type), "PL011 rev%u", amba_rev(dev)); + + ret = pl011_setup_port(&dev->dev, uap, &dev->res, portnr); + if (ret) + return ret; + + amba_set_drvdata(dev, uap); + + return pl011_register_port(uap); +} + +static int pl011_remove(struct amba_device *dev) +{ + struct uart_amba_port *uap = amba_get_drvdata(dev); + + uart_remove_one_port(&amba_reg, &uap->port); + pl011_unregister_port(uap); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int pl011_suspend(struct device *dev) +{ + struct uart_amba_port *uap = dev_get_drvdata(dev); + + if (!uap) + return -EINVAL; + + return uart_suspend_port(&amba_reg, &uap->port); +} + +static int pl011_resume(struct device *dev) +{ + struct uart_amba_port *uap = dev_get_drvdata(dev); + + if (!uap) + return -EINVAL; + + return uart_resume_port(&amba_reg, &uap->port); +} +#endif + +static SIMPLE_DEV_PM_OPS(pl011_dev_pm_ops, pl011_suspend, pl011_resume); + +static int sbsa_uart_probe(struct platform_device *pdev) +{ + struct uart_amba_port *uap; + struct resource *r; + int portnr, ret; + int baudrate; + + /* + * Check the mandatory baud rate parameter in the DT node early + * so that we can easily exit with the error. + */ + if (pdev->dev.of_node) { + struct device_node *np = pdev->dev.of_node; + + ret = of_property_read_u32(np, "current-speed", &baudrate); + if (ret) + return ret; + } else { + baudrate = 115200; + } + + portnr = pl011_find_free_port(); + if (portnr < 0) + return portnr; + + uap = devm_kzalloc(&pdev->dev, sizeof(struct uart_amba_port), + GFP_KERNEL); + if (!uap) + return -ENOMEM; + + ret = platform_get_irq(pdev, 0); + if (ret < 0) { + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "cannot obtain irq\n"); + return ret; + } + uap->port.irq = ret; + +#ifdef CONFIG_ACPI_SPCR_TABLE + if (qdf2400_e44_present) { + dev_info(&pdev->dev, "working around QDF2400 SoC erratum 44\n"); + uap->vendor = &vendor_qdt_qdf2400_e44; + } else +#endif + uap->vendor = &vendor_sbsa; + + uap->reg_offset = uap->vendor->reg_offset; + uap->fifosize = 32; + uap->port.iotype = uap->vendor->access_32b ? UPIO_MEM32 : UPIO_MEM; + uap->port.ops = &sbsa_uart_pops; + uap->fixed_baud = baudrate; + + snprintf(uap->type, sizeof(uap->type), "SBSA"); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + ret = pl011_setup_port(&pdev->dev, uap, r, portnr); + if (ret) + return ret; + + platform_set_drvdata(pdev, uap); + + return pl011_register_port(uap); +} + +static int sbsa_uart_remove(struct platform_device *pdev) +{ + struct uart_amba_port *uap = platform_get_drvdata(pdev); + + uart_remove_one_port(&amba_reg, &uap->port); + pl011_unregister_port(uap); + return 0; +} + +static const struct of_device_id sbsa_uart_of_match[] = { + { .compatible = "arm,sbsa-uart", }, + {}, +}; +MODULE_DEVICE_TABLE(of, sbsa_uart_of_match); + +static const struct acpi_device_id sbsa_uart_acpi_match[] = { + { "ARMH0011", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, sbsa_uart_acpi_match); + +static struct platform_driver arm_sbsa_uart_platform_driver = { + .probe = sbsa_uart_probe, + .remove = sbsa_uart_remove, + .driver = { + .name = "sbsa-uart", + .of_match_table = of_match_ptr(sbsa_uart_of_match), + .acpi_match_table = ACPI_PTR(sbsa_uart_acpi_match), + }, +}; + +static const struct amba_id pl011_ids[] = { + { + .id = 0x00041011, + .mask = 0x000fffff, + .data = &vendor_arm, + }, + { + .id = 0x00380802, + .mask = 0x00ffffff, + .data = &vendor_st, + }, + { + .id = AMBA_LINUX_ID(0x00, 0x1, 0xffe), + .mask = 0x00ffffff, + .data = &vendor_zte, + }, + { 0, 0 }, +}; + +MODULE_DEVICE_TABLE(amba, pl011_ids); + +static struct amba_driver pl011_driver = { + .drv = { + .name = "uart-pl011", + .pm = &pl011_dev_pm_ops, + }, + .id_table = pl011_ids, + .probe = pl011_probe, + .remove = pl011_remove, +}; + +static int __init pl011_init(void) +{ + printk(KERN_INFO "Serial: AMBA PL011 UART driver\n"); + + if (platform_driver_register(&arm_sbsa_uart_platform_driver)) + pr_warn("could not register SBSA UART platform driver\n"); + return amba_driver_register(&pl011_driver); +} + +static void __exit pl011_exit(void) +{ + platform_driver_unregister(&arm_sbsa_uart_platform_driver); + amba_driver_unregister(&pl011_driver); +} + +/* + * While this can be a module, if builtin it's most likely the console + * So let's leave module_exit but move module_init to an earlier place + */ +arch_initcall(pl011_init); +module_exit(pl011_exit); + +MODULE_AUTHOR("ARM Ltd/Deep Blue Solutions Ltd"); +MODULE_DESCRIPTION("ARM AMBA serial port driver"); +MODULE_LICENSE("GPL"); diff -ruN linux-org/drivers/tty/serial/xilinx_uartps.c linux/drivers/tty/serial/xilinx_uartps.c --- linux-org/drivers/tty/serial/xilinx_uartps.c 2022-03-25 09:55:41.533454838 +0100 +++ linux/drivers/tty/serial/xilinx_uartps.c 2022-03-25 10:15:23.353003010 +0100 @@ -1257,6 +1257,34 @@ spin_unlock_irqrestore(&port->lock, flags); } +#ifdef CONFIG_RAW_PRINTK + +static void cdns_uart_console_write_raw(struct console *co, const char *s, + unsigned int count) +{ + struct uart_port *port = &cdns_uart_port[co->index]; + unsigned int imr, ctrl; + + imr = readl(port->membase + CDNS_UART_IMR); + writel(imr, port->membase + CDNS_UART_IDR); + + ctrl = readl(port->membase + CDNS_UART_CR); + ctrl &= ~CDNS_UART_CR_TX_DIS; + ctrl |= CDNS_UART_CR_TX_EN; + writel(ctrl, port->membase + CDNS_UART_CR); + + while (count-- > 0) { + if (*s == '\n') + writel('\r', port->membase + CDNS_UART_FIFO); + writel(*s++, port->membase + CDNS_UART_FIFO); + } + + writel(ctrl, port->membase + CDNS_UART_CR); + writel(imr, port->membase + CDNS_UART_IER); +} + +#endif + /** * cdns_uart_console_setup - Initialize the uart to default config * @co: Console handle @@ -1294,7 +1322,12 @@ .write = cdns_uart_console_write, .device = uart_console_device, .setup = cdns_uart_console_setup, +#ifdef CONFIG_RAW_PRINTK + .write_raw = cdns_uart_console_write_raw, + .flags = CON_PRINTBUFFER | CON_RAW, +#else .flags = CON_PRINTBUFFER, +#endif .index = -1, /* Specified on the cmdline (e.g. console=ttyPS ) */ .data = &cdns_uart_uart_driver, }; diff -ruN linux-org/fs/exec.c linux/fs/exec.c --- linux-org/fs/exec.c 2022-03-25 09:55:41.973453180 +0100 +++ linux/fs/exec.c 2022-03-25 10:15:23.353003010 +0100 @@ -1003,6 +1003,7 @@ { struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; + unsigned long flags; /* Notify parent that we're no longer interested in the old VM */ tsk = current; @@ -1026,8 +1027,10 @@ task_lock(tsk); active_mm = tsk->active_mm; tsk->mm = mm; + ipipe_mm_switch_protect(flags); tsk->active_mm = mm; activate_mm(active_mm, mm); + ipipe_mm_switch_unprotect(flags); tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); diff -ruN linux-org/include/asm-generic/atomic.h linux/include/asm-generic/atomic.h --- linux-org/include/asm-generic/atomic.h 2022-03-25 09:55:42.269452065 +0100 +++ linux/include/asm-generic/atomic.h 2022-03-25 10:15:23.353003010 +0100 @@ -82,9 +82,9 @@ { \ unsigned long flags; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ v->counter = v->counter c_op i; \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ } #define ATOMIC_OP_RETURN(op, c_op) \ @@ -93,9 +93,9 @@ unsigned long flags; \ int ret; \ \ - raw_local_irq_save(flags); \ + flags = hard_local_irq_save(); \ ret = (v->counter = v->counter c_op i); \ - raw_local_irq_restore(flags); \ + hard_local_irq_restore(flags); \ \ return ret; \ } diff -ruN linux-org/include/asm-generic/bitops/atomic.h linux/include/asm-generic/bitops/atomic.h --- linux-org/include/asm-generic/bitops/atomic.h 2022-03-25 09:55:42.269452065 +0100 +++ linux/include/asm-generic/bitops/atomic.h 2022-03-25 10:15:23.353003010 +0100 @@ -22,20 +22,20 @@ * this is the substitute */ #define _atomic_spin_lock_irqsave(l,f) do { \ arch_spinlock_t *s = ATOMIC_HASH(l); \ - local_irq_save(f); \ + (f) = hard_local_irq_save(); \ arch_spin_lock(s); \ } while(0) #define _atomic_spin_unlock_irqrestore(l,f) do { \ arch_spinlock_t *s = ATOMIC_HASH(l); \ arch_spin_unlock(s); \ - local_irq_restore(f); \ + hard_local_irq_restore(f); \ } while(0) #else -# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0) -# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0) +# define _atomic_spin_lock_irqsave(l,f) do { (f) = hard_local_irq_save(); } while (0) +# define _atomic_spin_unlock_irqrestore(l,f) do { hard_local_irq_restore(f); } while (0) #endif /* diff -ruN linux-org/include/asm-generic/cmpxchg-local.h linux/include/asm-generic/cmpxchg-local.h --- linux-org/include/asm-generic/cmpxchg-local.h 2022-03-25 09:55:42.269452065 +0100 +++ linux/include/asm-generic/cmpxchg-local.h 2022-03-25 10:15:23.353003010 +0100 @@ -23,7 +23,7 @@ if (size == 8 && sizeof(unsigned long) != 8) wrong_size_cmpxchg(ptr); - raw_local_irq_save(flags); + flags = hard_local_irq_save(); switch (size) { case 1: prev = *(u8 *)ptr; if (prev == old) @@ -44,7 +44,7 @@ default: wrong_size_cmpxchg(ptr); } - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return prev; } @@ -57,11 +57,11 @@ u64 prev; unsigned long flags; - raw_local_irq_save(flags); + flags = hard_local_irq_save(); prev = *(u64 *)ptr; if (prev == old) *(u64 *)ptr = new; - raw_local_irq_restore(flags); + hard_local_irq_restore(flags); return prev; } diff -ruN linux-org/include/asm-generic/ipipe.h linux/include/asm-generic/ipipe.h --- linux-org/include/asm-generic/ipipe.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/asm-generic/ipipe.h 2022-03-25 10:15:23.353003010 +0100 @@ -0,0 +1,72 @@ +/* -*- linux-c -*- + * include/asm-generic/ipipe.h + * + * Copyright (C) 2002-2017 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + */ +#ifndef __ASM_GENERIC_IPIPE_H +#define __ASM_GENERIC_IPIPE_H + +#ifdef CONFIG_IPIPE + +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \ + defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT) +void __ipipe_uaccess_might_fault(void); +#else +#define __ipipe_uaccess_might_fault() might_fault() +#endif + +#define hard_cond_local_irq_enable() hard_local_irq_enable() +#define hard_cond_local_irq_disable() hard_local_irq_disable() +#define hard_cond_local_irq_save() hard_local_irq_save() +#define hard_cond_local_irq_restore(flags) hard_local_irq_restore(flags) + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT +void ipipe_root_only(void); +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ +static inline void ipipe_root_only(void) { } +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +void ipipe_stall_root(void); + +void ipipe_unstall_root(void); + +unsigned long ipipe_test_and_stall_root(void); + +unsigned long ipipe_test_root(void); + +void ipipe_restore_root(unsigned long x); + +#else /* !CONFIG_IPIPE */ + +#define hard_local_irq_save() arch_local_irq_save() +#define hard_local_irq_restore(x) arch_local_irq_restore(x) +#define hard_local_irq_enable() arch_local_irq_enable() +#define hard_local_irq_disable() arch_local_irq_disable() +#define hard_irqs_disabled() irqs_disabled() + +#define hard_cond_local_irq_enable() do { } while(0) +#define hard_cond_local_irq_disable() do { } while(0) +#define hard_cond_local_irq_save() 0 +#define hard_cond_local_irq_restore(flags) do { (void)(flags); } while(0) + +#define __ipipe_uaccess_might_fault() might_fault() + +static inline void ipipe_root_only(void) { } + +#endif /* !CONFIG_IPIPE */ + +#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE) +#define hard_smp_local_irq_save() hard_local_irq_save() +#define hard_smp_local_irq_restore(flags) hard_local_irq_restore(flags) +#else /* !CONFIG_SMP */ +#define hard_smp_local_irq_save() 0 +#define hard_smp_local_irq_restore(flags) do { (void)(flags); } while(0) +#endif /* CONFIG_SMP */ + +#endif diff -ruN linux-org/include/asm-generic/percpu.h linux/include/asm-generic/percpu.h --- linux-org/include/asm-generic/percpu.h 2022-03-25 09:55:42.273452050 +0100 +++ linux/include/asm-generic/percpu.h 2022-03-25 10:15:23.353003010 +0100 @@ -44,11 +44,33 @@ #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) #endif +#ifdef CONFIG_IPIPE +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) +extern int __ipipe_check_percpu_access(void); +#define __ipipe_cpu_offset \ + ({ \ + WARN_ON_ONCE(__ipipe_check_percpu_access()); \ + __my_cpu_offset; \ + }) +#else +#define __ipipe_cpu_offset __my_cpu_offset +#endif +#ifndef __ipipe_raw_cpu_ptr +#define __ipipe_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __ipipe_cpu_offset) +#endif +#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var))) +#endif /* CONFIG_IPIPE */ + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); #endif -#endif /* SMP */ +#else /* !SMP */ + +#define __ipipe_raw_cpu_ptr(ptr) VERIFY_PERCPU_PTR(ptr) +#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var))) + +#endif /* !SMP */ #ifndef PER_CPU_BASE_SECTION #ifdef CONFIG_SMP @@ -148,9 +170,9 @@ #define this_cpu_generic_to_op(pcp, val, op) \ do { \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ raw_cpu_generic_to_op(pcp, val, op); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ } while (0) @@ -158,9 +180,9 @@ ({ \ typeof(pcp) __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_add_return(pcp, val); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) @@ -168,9 +190,9 @@ ({ \ typeof(pcp) __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_xchg(pcp, nval); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) @@ -178,9 +200,9 @@ ({ \ typeof(pcp) __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) @@ -188,10 +210,10 @@ ({ \ int __ret; \ unsigned long __flags; \ - raw_local_irq_save(__flags); \ + __flags = hard_local_irq_save(); \ __ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ oval1, oval2, nval1, nval2); \ - raw_local_irq_restore(__flags); \ + hard_local_irq_restore(__flags); \ __ret; \ }) diff -ruN linux-org/include/asm-generic/switch_to.h linux/include/asm-generic/switch_to.h --- linux-org/include/asm-generic/switch_to.h 2022-03-25 09:55:42.273452050 +0100 +++ linux/include/asm-generic/switch_to.h 2022-03-25 10:15:23.353003010 +0100 @@ -21,10 +21,17 @@ */ extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); - +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH #define switch_to(prev, next, last) \ do { \ + hard_cond_local_irq_disable(); \ ((last) = __switch_to((prev), (next))); \ + hard_cond_local_irq_enable(); \ } while (0) - +#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ +#define switch_to(prev, next, last) \ + do { \ + ((last) = __switch_to((prev), (next))); \ + } while (0) +#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ #endif /* __ASM_GENERIC_SWITCH_TO_H */ diff -ruN linux-org/include/clocksource/timer-sp804.h linux/include/clocksource/timer-sp804.h --- linux-org/include/clocksource/timer-sp804.h 2022-03-25 09:55:42.273452050 +0100 +++ linux/include/clocksource/timer-sp804.h 2022-03-25 10:15:23.353003010 +0100 @@ -5,20 +5,23 @@ struct clk; int __sp804_clocksource_and_sched_clock_init(void __iomem *, + unsigned long phys, const char *, struct clk *, int); int __sp804_clockevents_init(void __iomem *, unsigned int, struct clk *, const char *); void sp804_timer_disable(void __iomem *); -static inline void sp804_clocksource_init(void __iomem *base, const char *name) +static inline void sp804_clocksource_init(void __iomem *base, unsigned long phys, + const char *name) { - __sp804_clocksource_and_sched_clock_init(base, name, NULL, 0); + __sp804_clocksource_and_sched_clock_init(base, phys, name, NULL, 0); } static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base, + unsigned long phys, const char *name) { - __sp804_clocksource_and_sched_clock_init(base, name, NULL, 1); + __sp804_clocksource_and_sched_clock_init(base, phys, name, NULL, 1); } static inline void sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name) diff -ruN linux-org/include/ipipe/setup.h linux/include/ipipe/setup.h --- linux-org/include/ipipe/setup.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/ipipe/setup.h 2022-03-25 10:15:23.353003010 +0100 @@ -0,0 +1,10 @@ +#ifndef _IPIPE_SETUP_H +#define _IPIPE_SETUP_H + +/* + * Placeholders for setup hooks defined by client domains. + */ + +static inline void __ipipe_early_client_setup(void) { } + +#endif /* !_IPIPE_SETUP_H */ diff -ruN linux-org/include/ipipe/thread_info.h linux/include/ipipe/thread_info.h --- linux-org/include/ipipe/thread_info.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/ipipe/thread_info.h 2022-03-25 10:15:23.353003010 +0100 @@ -0,0 +1,14 @@ +#ifndef _IPIPE_THREAD_INFO_H +#define _IPIPE_THREAD_INFO_H + +/* + * Placeholder for private thread information defined by client + * domains. + */ + +struct ipipe_threadinfo { +}; + +#define __ipipe_init_threadinfo(__p) do { } while (0) + +#endif /* !_IPIPE_THREAD_INFO_H */ diff -ruN linux-org/include/linux/clockchips.h linux/include/linux/clockchips.h --- linux-org/include/linux/clockchips.h 2022-03-24 17:11:41.887105586 +0100 +++ linux/include/linux/clockchips.h 2022-03-25 10:15:23.353003010 +0100 @@ -129,6 +129,15 @@ const struct cpumask *cpumask; struct list_head list; struct module *owner; + +#ifdef CONFIG_IPIPE + struct ipipe_timer *ipipe_timer; + unsigned ipipe_stolen; + +#define clockevent_ipipe_stolen(evt) ((evt)->ipipe_stolen) +#else +#define clockevent_ipipe_stolen(evt) (0) +#endif /* !CONFIG_IPIPE */ } ____cacheline_aligned; /* Helpers to verify state of a clockevent device */ diff -ruN linux-org/include/linux/clocksource.h linux/include/linux/clocksource.h --- linux-org/include/linux/clocksource.h 2022-03-25 09:55:42.321451869 +0100 +++ linux/include/linux/clocksource.h 2022-03-25 10:15:23.353003010 +0100 @@ -107,6 +107,9 @@ u64 wd_last; #endif struct module *owner; +#ifdef CONFIG_IPIPE_WANT_CLOCKSOURCE + u64 (*ipipe_read)(struct clocksource *cs); +#endif /* CONFIG_IPIPE_WANT_CLOCKSOURCE */ }; /* diff -ruN linux-org/include/linux/console.h linux/include/linux/console.h --- linux-org/include/linux/console.h 2022-03-25 09:55:42.321451869 +0100 +++ linux/include/linux/console.h 2022-03-25 10:15:23.353003010 +0100 @@ -133,10 +133,12 @@ #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ #define CON_BRL (32) /* Used for a braille device */ #define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ +#define CON_RAW (128) /* Supports raw write mode */ struct console { char name[16]; void (*write)(struct console *, const char *, unsigned); + void (*write_raw)(struct console *, const char *, unsigned); int (*read)(struct console *, char *, unsigned); struct tty_driver *(*device)(struct console *, int *); void (*unblank)(void); diff -ruN linux-org/include/linux/dw_apb_timer.h linux/include/linux/dw_apb_timer.h --- linux-org/include/linux/dw_apb_timer.h 2022-03-25 09:55:42.329451839 +0100 +++ linux/include/linux/dw_apb_timer.h 2022-03-25 10:15:23.353003010 +0100 @@ -35,6 +35,7 @@ struct dw_apb_clocksource { struct dw_apb_timer timer; struct clocksource cs; + unsigned long phys; }; void dw_apb_clockevent_register(struct dw_apb_clock_event_device *dw_ced); @@ -47,7 +48,7 @@ void __iomem *base, int irq, unsigned long freq); struct dw_apb_clocksource * dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base, - unsigned long freq); + unsigned long phys, unsigned long freq); void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs); void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs); u64 dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs); diff -ruN linux-org/include/linux/ftrace.h linux/include/linux/ftrace.h --- linux-org/include/linux/ftrace.h 2022-03-25 09:55:42.337451809 +0100 +++ linux/include/linux/ftrace.h 2022-03-25 10:15:23.353003010 +0100 @@ -141,6 +141,7 @@ FTRACE_OPS_FL_PID = 1 << 14, FTRACE_OPS_FL_RCU = 1 << 15, FTRACE_OPS_FL_TRACE_ARRAY = 1 << 16, + FTRACE_OPS_FL_IPIPE_EXCLUSIVE = 1 << 17, }; #ifdef CONFIG_DYNAMIC_FTRACE diff -ruN linux-org/include/linux/gpio/driver.h linux/include/linux/gpio/driver.h --- linux-org/include/linux/gpio/driver.h 2022-03-25 09:55:42.341451794 +0100 +++ linux/include/linux/gpio/driver.h 2022-03-25 10:15:23.353003010 +0100 @@ -154,7 +154,7 @@ void __iomem *reg_clr; void __iomem *reg_dir; int bgpio_bits; - spinlock_t bgpio_lock; + ipipe_spinlock_t bgpio_lock; unsigned long bgpio_data; unsigned long bgpio_dir; #endif diff -ruN linux-org/include/linux/hardirq.h linux/include/linux/hardirq.h --- linux-org/include/linux/hardirq.h 2022-03-25 09:55:42.341451794 +0100 +++ linux/include/linux/hardirq.h 2022-03-25 10:15:23.353003010 +0100 @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -62,6 +63,7 @@ #define nmi_enter() \ do { \ + __ipipe_nmi_enter(); \ printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ @@ -80,6 +82,7 @@ ftrace_nmi_exit(); \ lockdep_on(); \ printk_nmi_exit(); \ + __ipipe_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff -ruN linux-org/include/linux/ipipe_base.h linux/include/linux/ipipe_base.h --- linux-org/include/linux/ipipe_base.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/ipipe_base.h 2022-03-25 10:15:23.353003010 +0100 @@ -0,0 +1,243 @@ +/* -*- linux-c -*- + * include/linux/ipipe_base.h + * + * Copyright (C) 2002-2014 Philippe Gerum. + * 2007 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_BASE_H +#define __LINUX_IPIPE_BASE_H + +struct kvm_vcpu; +struct ipipe_vm_notifier; +struct irq_desc; + +#ifdef CONFIG_IPIPE + +#define IPIPE_CORE_APIREV CONFIG_IPIPE_CORE_APIREV + +#include +#include +#include +#include + +struct pt_regs; +struct ipipe_domain; + +struct ipipe_trap_data { + int exception; + struct pt_regs *regs; +}; + +struct ipipe_vm_notifier { + void (*handler)(struct ipipe_vm_notifier *nfy); +}; + +static inline int ipipe_virtual_irq_p(unsigned int irq) +{ + return irq >= IPIPE_VIRQ_BASE && irq < IPIPE_NR_IRQS; +} + +void __ipipe_init_early(void); + +void __ipipe_init(void); + +#ifdef CONFIG_PROC_FS +void __ipipe_init_proc(void); +#ifdef CONFIG_IPIPE_TRACE +void __ipipe_init_tracer(void); +#else /* !CONFIG_IPIPE_TRACE */ +static inline void __ipipe_init_tracer(void) { } +#endif /* CONFIG_IPIPE_TRACE */ +#else /* !CONFIG_PROC_FS */ +static inline void __ipipe_init_proc(void) { } +#endif /* CONFIG_PROC_FS */ + +void __ipipe_restore_root_nosync(unsigned long x); + +#define IPIPE_IRQF_NOACK 0x1 +#define IPIPE_IRQF_NOSYNC 0x2 + +void __ipipe_dispatch_irq(unsigned int irq, int flags); + +void __ipipe_do_sync_stage(void); + +void __ipipe_do_sync_pipeline(struct ipipe_domain *top); + +void __ipipe_lock_irq(unsigned int irq); + +void __ipipe_unlock_irq(unsigned int irq); + +void __ipipe_do_critical_sync(unsigned int irq, void *cookie); + +void __ipipe_ack_edge_irq(struct irq_desc *desc); + +void __ipipe_nop_irq(struct irq_desc *desc); + +static inline void __ipipe_idle(void) +{ + ipipe_unstall_root(); +} + +#ifndef __ipipe_sync_check +#define __ipipe_sync_check 1 +#endif + +static inline void __ipipe_sync_stage(void) +{ + if (likely(__ipipe_sync_check)) + __ipipe_do_sync_stage(); +} + +#ifndef __ipipe_run_irqtail +#define __ipipe_run_irqtail(irq) do { } while(0) +#endif + +int __ipipe_log_printk(const char *fmt, va_list args); +void __ipipe_flush_printk(unsigned int irq, void *cookie); + +#define __ipipe_get_cpu(flags) ({ (flags) = hard_preempt_disable(); ipipe_processor_id(); }) +#define __ipipe_put_cpu(flags) hard_preempt_enable(flags) + +int __ipipe_notify_syscall(struct pt_regs *regs); + +int __ipipe_notify_trap(int exception, struct pt_regs *regs); + +int __ipipe_notify_kevent(int event, void *data); + +#define __ipipe_report_trap(exception, regs) \ + __ipipe_notify_trap(exception, regs) + +#define __ipipe_report_sigwake(p) \ + do { \ + if (ipipe_notifier_enabled_p(p)) \ + __ipipe_notify_kevent(IPIPE_KEVT_SIGWAKE, p); \ + } while (0) + +struct ipipe_cpu_migration_data { + struct task_struct *task; + int dest_cpu; +}; + +#define __ipipe_report_setaffinity(__p, __dest_cpu) \ + do { \ + struct ipipe_cpu_migration_data d = { \ + .task = (__p), \ + .dest_cpu = (__dest_cpu), \ + }; \ + if (ipipe_notifier_enabled_p(__p)) \ + __ipipe_notify_kevent(IPIPE_KEVT_SETAFFINITY, &d); \ + } while (0) + +#define __ipipe_report_exit(p) \ + do { \ + if (ipipe_notifier_enabled_p(p)) \ + __ipipe_notify_kevent(IPIPE_KEVT_EXIT, p); \ + } while (0) + +#define __ipipe_report_setsched(p) \ + do { \ + if (ipipe_notifier_enabled_p(p)) \ + __ipipe_notify_kevent(IPIPE_KEVT_SETSCHED, p); \ + } while (0) + +#define __ipipe_report_schedule(prev, next) \ +do { \ + if (ipipe_notifier_enabled_p(next) || \ + ipipe_notifier_enabled_p(prev)) { \ + __this_cpu_write(ipipe_percpu.rqlock_owner, prev); \ + __ipipe_notify_kevent(IPIPE_KEVT_SCHEDULE, next); \ + } \ +} while (0) + +#define __ipipe_report_cleanup(mm) \ + __ipipe_notify_kevent(IPIPE_KEVT_CLEANUP, mm) + +#define __ipipe_report_clockfreq_update(freq) \ + __ipipe_notify_kevent(IPIPE_KEVT_CLOCKFREQ, &(freq)) + +void __ipipe_notify_vm_preemption(void); + +void __ipipe_call_mayday(struct pt_regs *regs); + +static inline void __ipipe_init_taskinfo(struct task_struct *p) { } + +#define __ipipe_serial_debug(__fmt, __args...) raw_printk(__fmt, ##__args) + +#else /* !CONFIG_IPIPE */ + +struct task_struct; +struct mm_struct; + +static inline void __ipipe_init_early(void) { } + +static inline void __ipipe_init(void) { } + +static inline void __ipipe_init_proc(void) { } + +static inline void __ipipe_idle(void) { } + +static inline void __ipipe_report_sigwake(struct task_struct *p) { } + +static inline void __ipipe_report_setaffinity(struct task_struct *p, + int dest_cpu) { } + +static inline void __ipipe_report_setsched(struct task_struct *p) { } + +static inline void __ipipe_report_exit(struct task_struct *p) { } + +static inline void __ipipe_report_cleanup(struct mm_struct *mm) { } + +#define __ipipe_report_trap(exception, regs) 0 + +static inline void __ipipe_init_taskinfo(struct task_struct *p) { } + +#define hard_preempt_disable() ({ preempt_disable(); 0; }) +#define hard_preempt_enable(flags) ({ preempt_enable(); (void)(flags); }) + +#define __ipipe_get_cpu(flags) ({ (void)(flags); get_cpu(); }) +#define __ipipe_put_cpu(flags) \ + do { \ + (void)(flags); \ + put_cpu(); \ + } while (0) + +#define __ipipe_root_tick_p(regs) 1 + +#define ipipe_handle_demuxed_irq(irq) generic_handle_irq(irq) + +#define __ipipe_enter_vm(vmf) do { } while (0) + +static inline void __ipipe_exit_vm(void) { } + +static inline void __ipipe_notify_vm_preemption(void) { } + +#define __ipipe_serial_debug(__fmt, __args...) do { } while (0) + +#endif /* !CONFIG_IPIPE */ + +#ifdef CONFIG_IPIPE_WANT_PTE_PINNING +void __ipipe_pin_mapping_globally(unsigned long start, + unsigned long end); +#else +static inline void __ipipe_pin_mapping_globally(unsigned long start, + unsigned long end) +{ } +#endif + +#endif /* !__LINUX_IPIPE_BASE_H */ diff -ruN linux-org/include/linux/ipipe_debug.h linux/include/linux/ipipe_debug.h --- linux-org/include/linux/ipipe_debug.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/ipipe_debug.h 2022-03-25 10:15:23.353003010 +0100 @@ -0,0 +1,100 @@ +/* -*- linux-c -*- + * include/linux/ipipe_debug.h + * + * Copyright (C) 2012 Philippe Gerum . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_DEBUG_H +#define __LINUX_IPIPE_DEBUG_H + +#include + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + +#include + +static inline int ipipe_disable_context_check(void) +{ + return xchg(raw_cpu_ptr(&ipipe_percpu.context_check), 0); +} + +static inline void ipipe_restore_context_check(int old_state) +{ + __this_cpu_write(ipipe_percpu.context_check, old_state); +} + +static inline void ipipe_context_check_off(void) +{ + int cpu; + for_each_online_cpu(cpu) + per_cpu(ipipe_percpu, cpu).context_check = 0; +} + +static inline void ipipe_save_context_nmi(void) +{ + int state = ipipe_disable_context_check(); + __this_cpu_write(ipipe_percpu.context_check_saved, state); +} + +static inline void ipipe_restore_context_nmi(void) +{ + ipipe_restore_context_check(__this_cpu_read(ipipe_percpu.context_check_saved)); +} + +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +static inline int ipipe_disable_context_check(void) +{ + return 0; +} + +static inline void ipipe_restore_context_check(int old_state) { } + +static inline void ipipe_context_check_off(void) { } + +static inline void ipipe_save_context_nmi(void) { } + +static inline void ipipe_restore_context_nmi(void) { } + +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +#ifdef CONFIG_IPIPE_DEBUG + +#define ipipe_check_irqoff() \ + do { \ + if (WARN_ON_ONCE(!hard_irqs_disabled())) \ + hard_local_irq_disable(); \ + } while (0) + +#else /* !CONFIG_IPIPE_DEBUG */ + +static inline void ipipe_check_irqoff(void) { } + +#endif /* !CONFIG_IPIPE_DEBUG */ + +#ifdef CONFIG_IPIPE_DEBUG_INTERNAL +#define IPIPE_WARN(c) WARN_ON(c) +#define IPIPE_WARN_ONCE(c) WARN_ON_ONCE(c) +#define IPIPE_BUG_ON(c) BUG_ON(c) +#else +#define IPIPE_WARN(c) do { (void)(c); } while (0) +#define IPIPE_WARN_ONCE(c) do { (void)(c); } while (0) +#define IPIPE_BUG_ON(c) do { (void)(c); } while (0) +#endif + +#endif /* !__LINUX_IPIPE_DEBUG_H */ diff -ruN linux-org/include/linux/ipipe_domain.h linux/include/linux/ipipe_domain.h --- linux-org/include/linux/ipipe_domain.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/ipipe_domain.h 2022-03-25 10:15:23.353003010 +0100 @@ -0,0 +1,357 @@ +/* -*- linux-c -*- + * include/linux/ipipe_domain.h + * + * Copyright (C) 2007-2012 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_DOMAIN_H +#define __LINUX_IPIPE_DOMAIN_H + +#ifdef CONFIG_IPIPE + +#include +#include +#include +#include + +struct task_struct; +struct mm_struct; +struct irq_desc; +struct ipipe_vm_notifier; + +#define __bpl_up(x) (((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1)) +/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */ +#define IPIPE_NR_VIRQS BITS_PER_LONG +/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */ +#define IPIPE_VIRQ_BASE __bpl_up(IPIPE_NR_XIRQS) +/* Total number of IRQ slots */ +#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS) + +#define IPIPE_IRQ_LOMAPSZ (IPIPE_NR_IRQS / BITS_PER_LONG) +#if IPIPE_IRQ_LOMAPSZ > BITS_PER_LONG +/* + * We need a 3-level mapping. This allows us to handle up to 32k IRQ + * vectors on 32bit machines, 256k on 64bit ones. + */ +#define __IPIPE_3LEVEL_IRQMAP 1 +#define IPIPE_IRQ_MDMAPSZ (__bpl_up(IPIPE_IRQ_LOMAPSZ) / BITS_PER_LONG) +#else +/* + * 2-level mapping is enough. This allows us to handle up to 1024 IRQ + * vectors on 32bit machines, 4096 on 64bit ones. + */ +#define __IPIPE_2LEVEL_IRQMAP 1 +#endif + +/* Per-cpu pipeline status */ +#define IPIPE_STALL_FLAG 0 /* interrupts (virtually) disabled. */ +#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG) + +/* Interrupt control bits */ +#define IPIPE_HANDLE_FLAG 0 +#define IPIPE_STICKY_FLAG 1 +#define IPIPE_LOCK_FLAG 2 +#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) +#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) +#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) + +#define __IPIPE_SYSCALL_P 0 +#define __IPIPE_TRAP_P 1 +#define __IPIPE_KEVENT_P 2 +#define __IPIPE_SYSCALL_E (1 << __IPIPE_SYSCALL_P) +#define __IPIPE_TRAP_E (1 << __IPIPE_TRAP_P) +#define __IPIPE_KEVENT_E (1 << __IPIPE_KEVENT_P) +#define __IPIPE_ALL_E 0x7 +#define __IPIPE_SYSCALL_R (8 << __IPIPE_SYSCALL_P) +#define __IPIPE_TRAP_R (8 << __IPIPE_TRAP_P) +#define __IPIPE_KEVENT_R (8 << __IPIPE_KEVENT_P) +#define __IPIPE_SHIFT_R 3 +#define __IPIPE_ALL_R (__IPIPE_ALL_E << __IPIPE_SHIFT_R) + +#define IPIPE_KEVT_SCHEDULE 0 +#define IPIPE_KEVT_SIGWAKE 1 +#define IPIPE_KEVT_SETSCHED 2 +#define IPIPE_KEVT_SETAFFINITY 3 +#define IPIPE_KEVT_EXIT 4 +#define IPIPE_KEVT_CLEANUP 5 +#define IPIPE_KEVT_HOSTRT 6 +#define IPIPE_KEVT_CLOCKFREQ 7 + +typedef void (*ipipe_irq_ackfn_t)(struct irq_desc *desc); + +typedef void (*ipipe_irq_handler_t)(unsigned int irq, + void *cookie); + +struct ipipe_domain { + int context_offset; + struct ipipe_irqdesc { + unsigned long control; + ipipe_irq_ackfn_t ackfn; + ipipe_irq_handler_t handler; + void *cookie; + } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; + const char *name; + struct mutex mutex; +}; + +static inline void * +__ipipe_irq_cookie(struct ipipe_domain *ipd, unsigned int irq) +{ + return ipd->irqs[irq].cookie; +} + +static inline ipipe_irq_handler_t +__ipipe_irq_handler(struct ipipe_domain *ipd, unsigned int irq) +{ + return ipd->irqs[irq].handler; +} + +extern struct ipipe_domain ipipe_root; + +#define ipipe_root_domain (&ipipe_root) + +extern struct ipipe_domain *ipipe_head_domain; + +struct ipipe_percpu_domain_data { + unsigned long status; /* <= Must be first in struct. */ + unsigned long irqpend_himap; +#ifdef __IPIPE_3LEVEL_IRQMAP + unsigned long irqpend_mdmap[IPIPE_IRQ_MDMAPSZ]; +#endif + unsigned long irqpend_lomap[IPIPE_IRQ_LOMAPSZ]; + unsigned long irqheld_map[IPIPE_IRQ_LOMAPSZ]; + unsigned long irqall[IPIPE_NR_IRQS]; + struct ipipe_domain *domain; + int coflags; +}; + +struct ipipe_percpu_data { + struct ipipe_percpu_domain_data root; + struct ipipe_percpu_domain_data head; + struct ipipe_percpu_domain_data *curr; + struct pt_regs tick_regs; + int hrtimer_irq; + struct task_struct *task_hijacked; + struct task_struct *rqlock_owner; + struct ipipe_vm_notifier *vm_notifier; + unsigned long nmi_state; + struct mm_struct *active_mm; +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + int context_check; + int context_check_saved; +#endif +}; + +/* + * CAREFUL: all accessors based on __ipipe_raw_cpu_ptr() you may find + * in this file should be used only while hw interrupts are off, to + * prevent from CPU migration regardless of the running domain. + */ +DECLARE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu); + +static inline struct ipipe_percpu_domain_data * +__context_of(struct ipipe_percpu_data *p, struct ipipe_domain *ipd) +{ + return (void *)p + ipd->context_offset; +} + +/** + * ipipe_percpu_context - return the address of the pipeline context + * data for a domain on a given CPU. + * + * NOTE: this is the slowest accessor, use it carefully. Prefer + * ipipe_this_cpu_context() for requests targeted at the current + * CPU. Additionally, if the target domain is known at build time, + * consider ipipe_this_cpu_{root, head}_context(). + */ +static inline struct ipipe_percpu_domain_data * +ipipe_percpu_context(struct ipipe_domain *ipd, int cpu) +{ + return __context_of(&per_cpu(ipipe_percpu, cpu), ipd); +} + +/** + * ipipe_this_cpu_context - return the address of the pipeline context + * data for a domain on the current CPU. hw IRQs must be off. + * + * NOTE: this accessor is a bit faster, but since we don't know which + * one of "root" or "head" ipd refers to, we still need to compute the + * context address from its offset. + */ +static inline struct ipipe_percpu_domain_data * +ipipe_this_cpu_context(struct ipipe_domain *ipd) +{ + return __context_of(__ipipe_raw_cpu_ptr(&ipipe_percpu), ipd); +} + +/** + * ipipe_this_cpu_root_context - return the address of the pipeline + * context data for the root domain on the current CPU. hw IRQs must + * be off. + * + * NOTE: this accessor is recommended when the domain we refer to is + * known at build time to be the root one. + */ +static inline struct ipipe_percpu_domain_data * +ipipe_this_cpu_root_context(void) +{ + return __ipipe_raw_cpu_ptr(&ipipe_percpu.root); +} + +/** + * ipipe_this_cpu_head_context - return the address of the pipeline + * context data for the registered head domain on the current CPU. hw + * IRQs must be off. + * + * NOTE: this accessor is recommended when the domain we refer to is + * known at build time to be the registered head domain. This address + * is always different from the context data of the root domain in + * absence of registered head domain. To get the address of the + * context data for the domain leading the pipeline at the time of the + * call (which may be root in absence of registered head domain), use + * ipipe_this_cpu_leading_context() instead. + */ +static inline struct ipipe_percpu_domain_data * +ipipe_this_cpu_head_context(void) +{ + return __ipipe_raw_cpu_ptr(&ipipe_percpu.head); +} + +/** + * ipipe_this_cpu_leading_context - return the address of the pipeline + * context data for the domain leading the pipeline on the current + * CPU. hw IRQs must be off. + * + * NOTE: this accessor is required when either root or a registered + * head domain may be the final target of this call, depending on + * whether the high priority domain was installed via + * ipipe_register_head(). + */ +static inline struct ipipe_percpu_domain_data * +ipipe_this_cpu_leading_context(void) +{ + return ipipe_this_cpu_context(ipipe_head_domain); +} + +/** + * __ipipe_get_current_context() - return the address of the pipeline + * context data of the domain running on the current CPU. hw IRQs must + * be off. + */ +static inline struct ipipe_percpu_domain_data *__ipipe_get_current_context(void) +{ + return __ipipe_raw_cpu_read(ipipe_percpu.curr); +} + +#define __ipipe_current_context __ipipe_get_current_context() + +/** + * __ipipe_set_current_context() - switch the current CPU to the + * specified domain context. hw IRQs must be off. + * + * NOTE: this is the only way to change the current domain for the + * current CPU. Don't bypass. + */ +static inline +void __ipipe_set_current_context(struct ipipe_percpu_domain_data *pd) +{ + struct ipipe_percpu_data *p; + p = __ipipe_raw_cpu_ptr(&ipipe_percpu); + p->curr = pd; +} + +/** + * __ipipe_set_current_domain() - switch the current CPU to the + * specified domain. This is equivalent to calling + * __ipipe_set_current_context() with the context data of that + * domain. hw IRQs must be off. + */ +static inline void __ipipe_set_current_domain(struct ipipe_domain *ipd) +{ + struct ipipe_percpu_data *p; + p = __ipipe_raw_cpu_ptr(&ipipe_percpu); + p->curr = __context_of(p, ipd); +} + +static inline struct ipipe_percpu_domain_data *ipipe_current_context(void) +{ + struct ipipe_percpu_domain_data *pd; + unsigned long flags; + + flags = hard_smp_local_irq_save(); + pd = __ipipe_get_current_context(); + hard_smp_local_irq_restore(flags); + + return pd; +} + +static inline struct ipipe_domain *__ipipe_get_current_domain(void) +{ + return __ipipe_get_current_context()->domain; +} + +#define __ipipe_current_domain __ipipe_get_current_domain() + +/** + * __ipipe_get_current_domain() - return the address of the pipeline + * domain running on the current CPU. hw IRQs must be off. + */ +static inline struct ipipe_domain *ipipe_get_current_domain(void) +{ + struct ipipe_domain *ipd; + unsigned long flags; + + flags = hard_smp_local_irq_save(); + ipd = __ipipe_get_current_domain(); + hard_smp_local_irq_restore(flags); + + return ipd; +} + +#define ipipe_current_domain ipipe_get_current_domain() + +#define __ipipe_root_p (__ipipe_current_domain == ipipe_root_domain) +#define ipipe_root_p (ipipe_current_domain == ipipe_root_domain) + +#ifdef CONFIG_SMP +#define __ipipe_root_status (ipipe_this_cpu_root_context()->status) +#else +extern unsigned long __ipipe_root_status; +#endif + +#define __ipipe_head_status (ipipe_this_cpu_head_context()->status) + +/** + * __ipipe_ipending_p() - Whether we have interrupts pending + * (i.e. logged) for the given domain context on the current CPU. hw + * IRQs must be off. + */ +static inline int __ipipe_ipending_p(struct ipipe_percpu_domain_data *pd) +{ + return pd->irqpend_himap != 0; +} + +static inline unsigned long +__ipipe_cpudata_irq_hits(struct ipipe_domain *ipd, int cpu, unsigned int irq) +{ + return ipipe_percpu_context(ipd, cpu)->irqall[irq]; +} + +#endif /* CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_DOMAIN_H */ diff -ruN linux-org/include/linux/ipipe.h linux/include/linux/ipipe.h --- linux-org/include/linux/ipipe.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/ipipe.h 2022-03-25 10:15:23.353003010 +0100 @@ -0,0 +1,487 @@ +/* -*- linux-c -*- + * include/linux/ipipe.h + * + * Copyright (C) 2002-2014 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_H +#define __LINUX_IPIPE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct cpuidle_device; +struct cpuidle_state; + +#ifdef CONFIG_IPIPE + +#include + +/* ipipe_set_hooks(..., enables) */ +#define IPIPE_SYSCALL __IPIPE_SYSCALL_E +#define IPIPE_TRAP __IPIPE_TRAP_E +#define IPIPE_KEVENT __IPIPE_KEVENT_E + +struct ipipe_sysinfo { + int sys_nr_cpus; /* Number of CPUs on board */ + int sys_hrtimer_irq; /* hrtimer device IRQ */ + u64 sys_hrtimer_freq; /* hrtimer device frequency */ + u64 sys_hrclock_freq; /* hrclock device frequency */ + u64 sys_cpu_freq; /* CPU frequency (Hz) */ + struct ipipe_arch_sysinfo arch; +}; + +struct ipipe_work_header { + size_t size; + void (*handler)(struct ipipe_work_header *work); +}; + +extern unsigned int __ipipe_printk_virq; + +void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq); + +void __ipipe_complete_domain_migration(void); + +int __ipipe_switch_tail(void); + +void __ipipe_share_current(int flags); + +void __ipipe_arch_share_current(int flags); + +int __ipipe_migrate_head(void); + +void __ipipe_reenter_root(void); + +int __ipipe_disable_ondemand_mappings(struct task_struct *p); + +int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma); + +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + +#define prepare_arch_switch(next) \ + do { \ + hard_local_irq_enable(); \ + __ipipe_report_schedule(current, next); \ + } while(0) + +#ifndef ipipe_get_active_mm +static inline struct mm_struct *ipipe_get_active_mm(void) +{ + return __this_cpu_read(ipipe_percpu.active_mm); +} +#define ipipe_get_active_mm ipipe_get_active_mm +#endif + +#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + +#define prepare_arch_switch(next) \ + do { \ + __ipipe_report_schedule(current, next); \ + hard_local_irq_disable(); \ + } while(0) + +#ifndef ipipe_get_active_mm +#define ipipe_get_active_mm() (current->active_mm) +#endif + +#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + +#ifdef CONFIG_IPIPE_WANT_CLOCKSOURCE + +extern unsigned long long __ipipe_cs_freq; + +extern struct clocksource *__ipipe_cs; + +#endif /* CONFIG_IPIPE_WANT_CLOCKSOURCE */ + +static inline bool __ipipe_hrclock_ok(void) +{ + return __ipipe_hrclock_freq != 0; +} + +static inline void __ipipe_nmi_enter(void) +{ + __this_cpu_write(ipipe_percpu.nmi_state, __ipipe_root_status); + __set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); + ipipe_save_context_nmi(); +} + +static inline void __ipipe_nmi_exit(void) +{ + ipipe_restore_context_nmi(); + if (!test_bit(IPIPE_STALL_FLAG, raw_cpu_ptr(&ipipe_percpu.nmi_state))) + __clear_bit(IPIPE_STALL_FLAG, &__ipipe_root_status); +} + +/* KVM-side calls, hw IRQs off. */ +static inline void __ipipe_enter_vm(struct ipipe_vm_notifier *vmf) +{ + struct ipipe_percpu_data *p; + + p = raw_cpu_ptr(&ipipe_percpu); + p->vm_notifier = vmf; + barrier(); +} + +static inline void __ipipe_exit_vm(void) +{ + struct ipipe_percpu_data *p; + + p = raw_cpu_ptr(&ipipe_percpu); + p->vm_notifier = NULL; + barrier(); +} + +/* Client-side call, hw IRQs off. */ +void __ipipe_notify_vm_preemption(void); + +static inline void __ipipe_sync_pipeline(struct ipipe_domain *top) +{ + if (__ipipe_current_domain != top) { + __ipipe_do_sync_pipeline(top); + return; + } + if (!test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(top)->status)) + __ipipe_sync_stage(); +} + +void ipipe_register_head(struct ipipe_domain *ipd, + const char *name); + +void ipipe_unregister_head(struct ipipe_domain *ipd); + +int ipipe_request_irq(struct ipipe_domain *ipd, + unsigned int irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t ackfn); + +void ipipe_free_irq(struct ipipe_domain *ipd, + unsigned int irq); + +void ipipe_raise_irq(unsigned int irq); + +int ipipe_handle_syscall(struct thread_info *ti, + unsigned long nr, struct pt_regs *regs); + +void ipipe_set_hooks(struct ipipe_domain *ipd, + int enables); + +unsigned int ipipe_alloc_virq(void); + +void ipipe_free_virq(unsigned int virq); + +static inline void ipipe_post_irq_head(unsigned int irq) +{ + __ipipe_set_irq_pending(ipipe_head_domain, irq); +} + +static inline void ipipe_post_irq_root(unsigned int irq) +{ + __ipipe_set_irq_pending(&ipipe_root, irq); +} + +static inline void ipipe_stall_head(void) +{ + hard_local_irq_disable(); + __set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status); +} + +static inline unsigned long ipipe_test_and_stall_head(void) +{ + hard_local_irq_disable(); + return __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status); +} + +static inline unsigned long ipipe_test_head(void) +{ + unsigned long flags, ret; + + flags = hard_smp_local_irq_save(); + ret = test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status); + hard_smp_local_irq_restore(flags); + + return ret; +} + +void ipipe_unstall_head(void); + +void __ipipe_restore_head(unsigned long x); + +static inline void ipipe_restore_head(unsigned long x) +{ + ipipe_check_irqoff(); + if ((x ^ test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status)) & 1) + __ipipe_restore_head(x); +} + +void __ipipe_post_work_root(struct ipipe_work_header *work); + +#define ipipe_post_work_root(p, header) \ + do { \ + void header_not_at_start(void); \ + if (offsetof(typeof(*(p)), header)) { \ + header_not_at_start(); \ + } \ + __ipipe_post_work_root(&(p)->header); \ + } while (0) + +int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); + +unsigned long ipipe_critical_enter(void (*syncfn)(void)); + +void ipipe_critical_exit(unsigned long flags); + +void ipipe_prepare_panic(void); + +#ifdef CONFIG_SMP +#ifndef ipipe_smp_p +#define ipipe_smp_p (1) +#endif +void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask); +void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask); +#else /* !CONFIG_SMP */ +#define ipipe_smp_p (0) +static inline +void ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask) { } +static inline void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask) { } +static inline void ipipe_disable_smp(void) { } +#endif /* CONFIG_SMP */ + +static inline void ipipe_restore_root_nosync(unsigned long x) +{ + unsigned long flags; + + flags = hard_smp_local_irq_save(); + __ipipe_restore_root_nosync(x); + hard_smp_local_irq_restore(flags); +} + +/* Must be called hw IRQs off. */ +static inline void ipipe_lock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = __ipipe_current_domain; + if (ipd == ipipe_root_domain) + __ipipe_lock_irq(irq); +} + +/* Must be called hw IRQs off. */ +static inline void ipipe_unlock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = __ipipe_current_domain; + if (ipd == ipipe_root_domain) + __ipipe_unlock_irq(irq); +} + +static inline struct ipipe_threadinfo *ipipe_current_threadinfo(void) +{ + return ¤t_thread_info()->ipipe_data; +} + +#define ipipe_task_threadinfo(p) (&task_thread_info(p)->ipipe_data) + +void ipipe_enable_irq(unsigned int irq); + +static inline void ipipe_disable_irq(unsigned int irq) +{ + struct irq_desc *desc; + struct irq_chip *chip; + + desc = irq_to_desc(irq); + if (desc == NULL) + return; + + chip = irq_desc_get_chip(desc); + + if (WARN_ON_ONCE(chip->irq_disable == NULL && chip->irq_mask == NULL)) + return; + + if (chip->irq_disable) + chip->irq_disable(&desc->irq_data); + else + chip->irq_mask(&desc->irq_data); +} + +static inline void ipipe_end_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) + desc->ipipe_end(desc); +} + +static inline int ipipe_chained_irq_p(struct irq_desc *desc) +{ + void __ipipe_chained_irq(struct irq_desc *desc); + + return desc->handle_irq == __ipipe_chained_irq; +} + +static inline void ipipe_handle_demuxed_irq(unsigned int cascade_irq) +{ + ipipe_trace_irq_entry(cascade_irq); + __ipipe_dispatch_irq(cascade_irq, IPIPE_IRQF_NOSYNC); + ipipe_trace_irq_exit(cascade_irq); +} + +static inline void __ipipe_init_threadflags(struct thread_info *ti) +{ + ti->ipipe_flags = 0; +} + +static inline +void ipipe_set_ti_thread_flag(struct thread_info *ti, int flag) +{ + set_bit(flag, &ti->ipipe_flags); +} + +static inline +void ipipe_clear_ti_thread_flag(struct thread_info *ti, int flag) +{ + clear_bit(flag, &ti->ipipe_flags); +} + +static inline +void ipipe_test_and_clear_ti_thread_flag(struct thread_info *ti, int flag) +{ + test_and_clear_bit(flag, &ti->ipipe_flags); +} + +static inline +int ipipe_test_ti_thread_flag(struct thread_info *ti, int flag) +{ + return test_bit(flag, &ti->ipipe_flags); +} + +#define ipipe_set_thread_flag(flag) \ + ipipe_set_ti_thread_flag(current_thread_info(), flag) + +#define ipipe_clear_thread_flag(flag) \ + ipipe_clear_ti_thread_flag(current_thread_info(), flag) + +#define ipipe_test_and_clear_thread_flag(flag) \ + ipipe_test_and_clear_ti_thread_flag(current_thread_info(), flag) + +#define ipipe_test_thread_flag(flag) \ + ipipe_test_ti_thread_flag(current_thread_info(), flag) + +#define ipipe_enable_notifier(p) \ + ipipe_set_ti_thread_flag(task_thread_info(p), TIP_NOTIFY) + +#define ipipe_disable_notifier(p) \ + do { \ + struct thread_info *ti = task_thread_info(p); \ + ipipe_clear_ti_thread_flag(ti, TIP_NOTIFY); \ + ipipe_clear_ti_thread_flag(ti, TIP_MAYDAY); \ + } while (0) + +#define ipipe_notifier_enabled_p(p) \ + ipipe_test_ti_thread_flag(task_thread_info(p), TIP_NOTIFY) + +#define ipipe_raise_mayday(p) \ + do { \ + struct thread_info *ti = task_thread_info(p); \ + ipipe_check_irqoff(); \ + if (ipipe_test_ti_thread_flag(ti, TIP_NOTIFY)) \ + ipipe_set_ti_thread_flag(ti, TIP_MAYDAY); \ + } while (0) + +#ifdef CONFIG_IPIPE_TRACE +void __ipipe_tracer_hrclock_initialized(void); +#else /* !CONFIG_IPIPE_TRACE */ +#define __ipipe_tracer_hrclock_initialized() do { } while(0) +#endif /* !CONFIG_IPIPE_TRACE */ + +int ipipe_get_domain_slope_hook(struct task_struct *prev, + struct task_struct *next); + +bool __ipipe_enter_cpuidle(void); + +bool ipipe_enter_cpuidle(struct cpuidle_device *dev, + struct cpuidle_state *state); + +void ipipe_exit_cpuidle(void); + +#else /* !CONFIG_IPIPE */ + +#define __ipipe_root_p 1 +#define ipipe_root_p 1 + +static inline void __ipipe_init_threadflags(struct thread_info *ti) { } + +static inline void __ipipe_complete_domain_migration(void) { } + +static inline int __ipipe_switch_tail(void) +{ + return 0; +} + +static inline void __ipipe_nmi_enter(void) { } + +static inline void __ipipe_nmi_exit(void) { } + +#define ipipe_safe_current() current +#define ipipe_processor_id() smp_processor_id() + +static inline int ipipe_test_foreign_stack(void) +{ + return 0; +} + +static inline void ipipe_lock_irq(unsigned int irq) { } + +static inline void ipipe_unlock_irq(unsigned int irq) { } + +static inline +int ipipe_handle_syscall(struct thread_info *ti, + unsigned long nr, struct pt_regs *regs) +{ + return 0; +} + +static inline +int ipipe_get_domain_slope_hook(struct task_struct *prev, + struct task_struct *next) +{ + return 0; +} + +static inline bool __ipipe_enter_cpuidle(void) +{ + return true; +} + +static inline +bool ipipe_enter_cpuidle(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + return true; +} + +static inline void ipipe_exit_cpuidle(void) { } + +#endif /* !CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_H */ diff -ruN linux-org/include/linux/ipipe_lock.h linux/include/linux/ipipe_lock.h --- linux-org/include/linux/ipipe_lock.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/ipipe_lock.h 2022-03-25 10:15:23.357002995 +0100 @@ -0,0 +1,327 @@ +/* -*- linux-c -*- + * include/linux/ipipe_lock.h + * + * Copyright (C) 2009 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_LOCK_H +#define __LINUX_IPIPE_LOCK_H + +typedef struct { + arch_spinlock_t arch_lock; +} __ipipe_spinlock_t; + +#define ipipe_spinlock(lock) ((__ipipe_spinlock_t *)(lock)) +#define ipipe_spinlock_p(lock) \ + __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) || \ + __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t []) + +#define std_spinlock_raw(lock) ((raw_spinlock_t *)(lock)) +#define std_spinlock_raw_p(lock) \ + __builtin_types_compatible_p(typeof(lock), raw_spinlock_t *) || \ + __builtin_types_compatible_p(typeof(lock), raw_spinlock_t []) + +#ifdef CONFIG_PREEMPT_RT_FULL + +#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ + do { \ + if (ipipe_spinlock_p(lock)) \ + (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \ + else if (std_spinlock_raw_p(lock)) \ + __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \ + else __bad_lock_type(); \ + } while (0) + +#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags) \ + ({ \ + int __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \ + else __bad_lock_type(); \ + __ret__; \ + }) + +#define PICK_SPINTRYLOCK_IRQ(lock) \ + ({ \ + int __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \ + else __bad_lock_type(); \ + __ret__; \ + }) + +#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ + do { \ + if (ipipe_spinlock_p(lock)) \ + __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \ + else if (std_spinlock_raw_p(lock)) { \ + __ipipe_spin_unlock_debug(flags); \ + __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \ + } else __bad_lock_type(); \ + } while (0) + +#define PICK_SPINOP(op, lock) \ + ({ \ + if (ipipe_spinlock_p(lock)) \ + arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ + else if (std_spinlock_raw_p(lock)) \ + __real_raw_spin##op(std_spinlock_raw(lock)); \ + else __bad_lock_type(); \ + (void)0; \ + }) + +#define PICK_SPINOP_RET(op, lock, type) \ + ({ \ + type __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \ + else { __ret__ = -1; __bad_lock_type(); } \ + __ret__; \ + }) + +#else /* !CONFIG_PREEMPT_RT_FULL */ + +#define std_spinlock(lock) ((spinlock_t *)(lock)) +#define std_spinlock_p(lock) \ + __builtin_types_compatible_p(typeof(lock), spinlock_t *) || \ + __builtin_types_compatible_p(typeof(lock), spinlock_t []) + +#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ + do { \ + if (ipipe_spinlock_p(lock)) \ + (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \ + else if (std_spinlock_raw_p(lock)) \ + __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \ + else if (std_spinlock_p(lock)) \ + __real_raw_spin_lock_irqsave(&std_spinlock(lock)->rlock, flags); \ + else __bad_lock_type(); \ + } while (0) + +#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags) \ + ({ \ + int __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \ + else if (std_spinlock_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irqsave(&std_spinlock(lock)->rlock, flags); \ + else __bad_lock_type(); \ + __ret__; \ + }) + +#define PICK_SPINTRYLOCK_IRQ(lock) \ + ({ \ + int __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \ + else if (std_spinlock_p(lock)) \ + __ret__ = __real_raw_spin_trylock_irq(&std_spinlock(lock)->rlock); \ + else __bad_lock_type(); \ + __ret__; \ + }) + +#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ + do { \ + if (ipipe_spinlock_p(lock)) \ + __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \ + else { \ + __ipipe_spin_unlock_debug(flags); \ + if (std_spinlock_raw_p(lock)) \ + __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \ + else if (std_spinlock_p(lock)) \ + __real_raw_spin_unlock_irqrestore(&std_spinlock(lock)->rlock, flags); \ + } \ + } while (0) + +#define PICK_SPINOP(op, lock) \ + ({ \ + if (ipipe_spinlock_p(lock)) \ + arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ + else if (std_spinlock_raw_p(lock)) \ + __real_raw_spin##op(std_spinlock_raw(lock)); \ + else if (std_spinlock_p(lock)) \ + __real_raw_spin##op(&std_spinlock(lock)->rlock); \ + else __bad_lock_type(); \ + (void)0; \ + }) + +#define PICK_SPINOP_RET(op, lock, type) \ + ({ \ + type __ret__; \ + if (ipipe_spinlock_p(lock)) \ + __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ + else if (std_spinlock_raw_p(lock)) \ + __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \ + else if (std_spinlock_p(lock)) \ + __ret__ = __real_raw_spin##op(&std_spinlock(lock)->rlock); \ + else { __ret__ = -1; __bad_lock_type(); } \ + __ret__; \ + }) + +#endif /* !CONFIG_PREEMPT_RT_FULL */ + +#define arch_spin_lock_init(lock) \ + do { \ + IPIPE_DEFINE_SPINLOCK(__lock__); \ + *((ipipe_spinlock_t *)lock) = __lock__; \ + } while (0) + +#define arch_spin_lock_irq(lock) \ + do { \ + hard_local_irq_disable(); \ + arch_spin_lock(lock); \ + } while (0) + +#define arch_spin_unlock_irq(lock) \ + do { \ + arch_spin_unlock(lock); \ + hard_local_irq_enable(); \ + } while (0) + +typedef struct { + arch_rwlock_t arch_lock; +} __ipipe_rwlock_t; + +#define ipipe_rwlock_p(lock) \ + __builtin_types_compatible_p(typeof(lock), __ipipe_rwlock_t *) + +#define std_rwlock_p(lock) \ + __builtin_types_compatible_p(typeof(lock), rwlock_t *) + +#define ipipe_rwlock(lock) ((__ipipe_rwlock_t *)(lock)) +#define std_rwlock(lock) ((rwlock_t *)(lock)) + +#define PICK_RWOP(op, lock) \ + do { \ + if (ipipe_rwlock_p(lock)) \ + arch##op(&ipipe_rwlock(lock)->arch_lock); \ + else if (std_rwlock_p(lock)) \ + _raw##op(std_rwlock(lock)); \ + else __bad_lock_type(); \ + } while (0) + +extern int __bad_lock_type(void); + +#ifdef CONFIG_IPIPE + +#define ipipe_spinlock_t __ipipe_spinlock_t +#define IPIPE_DEFINE_RAW_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED +#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern ipipe_spinlock_t x +#define IPIPE_DEFINE_SPINLOCK(x) IPIPE_DEFINE_RAW_SPINLOCK(x) +#define IPIPE_DECLARE_SPINLOCK(x) IPIPE_DECLARE_RAW_SPINLOCK(x) + +#define IPIPE_SPIN_LOCK_UNLOCKED \ + (__ipipe_spinlock_t) { .arch_lock = __ARCH_SPIN_LOCK_UNLOCKED } + +#define spin_lock_irqsave_cond(lock, flags) \ + spin_lock_irqsave(lock, flags) + +#define spin_unlock_irqrestore_cond(lock, flags) \ + spin_unlock_irqrestore(lock, flags) + +#define raw_spin_lock_irqsave_cond(lock, flags) \ + raw_spin_lock_irqsave(lock, flags) + +#define raw_spin_unlock_irqrestore_cond(lock, flags) \ + raw_spin_unlock_irqrestore(lock, flags) + +void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock); + +int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock); + +void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock); + +unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock); + +int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock, + unsigned long *x); + +void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock, + unsigned long x); + +void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock); + +void __ipipe_spin_unlock_irqcomplete(unsigned long x); + +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) +void __ipipe_spin_unlock_debug(unsigned long flags); +#else +#define __ipipe_spin_unlock_debug(flags) do { } while (0) +#endif + +#define ipipe_rwlock_t __ipipe_rwlock_t +#define IPIPE_DEFINE_RWLOCK(x) ipipe_rwlock_t x = IPIPE_RW_LOCK_UNLOCKED +#define IPIPE_DECLARE_RWLOCK(x) extern ipipe_rwlock_t x + +#define IPIPE_RW_LOCK_UNLOCKED \ + (__ipipe_rwlock_t) { .arch_lock = __ARCH_RW_LOCK_UNLOCKED } + +#else /* !CONFIG_IPIPE */ + +#define ipipe_spinlock_t spinlock_t +#define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x) +#define IPIPE_DECLARE_SPINLOCK(x) extern spinlock_t x +#define IPIPE_SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(unknown) +#define IPIPE_DEFINE_RAW_SPINLOCK(x) DEFINE_RAW_SPINLOCK(x) +#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern raw_spinlock_t x + +#define spin_lock_irqsave_cond(lock, flags) \ + do { \ + (void)(flags); \ + spin_lock(lock); \ + } while(0) + +#define spin_unlock_irqrestore_cond(lock, flags) \ + spin_unlock(lock) + +#define raw_spin_lock_irqsave_cond(lock, flags) \ + do { \ + (void)(flags); \ + raw_spin_lock(lock); \ + } while(0) + +#define raw_spin_unlock_irqrestore_cond(lock, flags) \ + raw_spin_unlock(lock) + +#define __ipipe_spin_lock_irq(lock) do { } while (0) +#define __ipipe_spin_unlock_irq(lock) do { } while (0) +#define __ipipe_spin_lock_irqsave(lock) 0 +#define __ipipe_spin_trylock_irq(lock) 1 +#define __ipipe_spin_trylock_irqsave(lock, x) ({ (void)(x); 1; }) +#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while (0) +#define __ipipe_spin_unlock_irqbegin(lock) spin_unlock(lock) +#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while (0) +#define __ipipe_spin_unlock_debug(flags) do { } while (0) + +#define ipipe_rwlock_t rwlock_t +#define IPIPE_DEFINE_RWLOCK(x) DEFINE_RWLOCK(x) +#define IPIPE_DECLARE_RWLOCK(x) extern rwlock_t x +#define IPIPE_RW_LOCK_UNLOCKED RW_LOCK_UNLOCKED + +#endif /* !CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_LOCK_H */ diff -ruN linux-org/include/linux/ipipe_tickdev.h linux/include/linux/ipipe_tickdev.h --- linux-org/include/linux/ipipe_tickdev.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/ipipe_tickdev.h 2022-03-25 10:15:23.357002995 +0100 @@ -0,0 +1,159 @@ +/* -*- linux-c -*- + * include/linux/ipipe_tickdev.h + * + * Copyright (C) 2007 Philippe Gerum. + * Copyright (C) 2012 Gilles Chanteperdrix + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_TICKDEV_H +#define __LINUX_IPIPE_TICKDEV_H + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_IPIPE + +struct clock_event_device; + +struct ipipe_hostrt_data { + short live; + seqcount_t seqcount; + time_t wall_time_sec; + u32 wall_time_nsec; + struct timespec wall_to_monotonic; + u64 cycle_last; + u64 mask; + u32 mult; + u32 shift; +}; + +enum clock_event_mode { + CLOCK_EVT_MODE_PERIODIC, + CLOCK_EVT_MODE_ONESHOT, + CLOCK_EVT_MODE_UNUSED, + CLOCK_EVT_MODE_SHUTDOWN, +}; + +struct ipipe_timer { + int irq; + void (*request)(struct ipipe_timer *timer, int steal); + int (*set)(unsigned long ticks, void *timer); + void (*ack)(void); + void (*release)(struct ipipe_timer *timer); + + /* Only if registering a timer directly */ + const char *name; + unsigned rating; + unsigned long freq; + unsigned min_delay_ticks; + const struct cpumask *cpumask; + + /* For internal use */ + void *timer_set; /* pointer passed to ->set() callback */ + struct clock_event_device *host_timer; + struct list_head link; + + /* Conversions between clock frequency and timer frequency */ + unsigned c2t_integ; + unsigned c2t_frac; + + /* For clockevent interception */ + u32 real_mult; + u32 real_shift; + void (*mode_handler)(enum clock_event_mode mode, + struct clock_event_device *); + int orig_mode; + int (*orig_set_state_periodic)(struct clock_event_device *); + int (*orig_set_state_oneshot)(struct clock_event_device *); + int (*orig_set_state_oneshot_stopped)(struct clock_event_device *); + int (*orig_set_state_shutdown)(struct clock_event_device *); + int (*orig_set_next_event)(unsigned long evt, + struct clock_event_device *cdev); + unsigned int (*refresh_freq)(void); +}; + +#define __ipipe_hrtimer_irq __ipipe_raw_cpu_read(ipipe_percpu.hrtimer_irq) + +extern unsigned long __ipipe_hrtimer_freq; + +/* + * Called by clockevents_register_device, to register a piggybacked + * ipipe timer, if there is one + */ +void ipipe_host_timer_register(struct clock_event_device *clkevt); + +/* + * Register a standalone ipipe timer + */ +void ipipe_timer_register(struct ipipe_timer *timer); + +/* + * Chooses the best timer for each cpu. Take over its handling. + */ +int ipipe_select_timers(const struct cpumask *mask); + +/* + * Release the per-cpu timers + */ +void ipipe_timers_release(void); + +/* + * Start handling the per-cpu timer irq, and intercepting the linux clockevent + * device callbacks. + */ +int ipipe_timer_start(void (*tick_handler)(void), + void (*emumode)(enum clock_event_mode mode, + struct clock_event_device *cdev), + int (*emutick)(unsigned long evt, + struct clock_event_device *cdev), + unsigned cpu); + +/* + * Stop handling a per-cpu timer + */ +void ipipe_timer_stop(unsigned cpu); + +/* + * Program the timer + */ +void ipipe_timer_set(unsigned long delay); + +const char *ipipe_timer_name(void); + +unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns); + +void __ipipe_timer_refresh_freq(unsigned int hrclock_freq); + +#else /* !CONFIG_IPIPE */ + +#define ipipe_host_timer_register(clkevt) do { } while (0) + +#endif /* !CONFIG_IPIPE */ + +#ifdef CONFIG_IPIPE_HAVE_HOSTRT +void ipipe_update_hostrt(struct timekeeper *tk); +#else +static inline void +ipipe_update_hostrt(struct timekeeper *tk) {} +#endif + +#endif /* __LINUX_IPIPE_TICKDEV_H */ diff -ruN linux-org/include/linux/ipipe_trace.h linux/include/linux/ipipe_trace.h --- linux-org/include/linux/ipipe_trace.h 1970-01-01 01:00:00.000000000 +0100 +++ linux/include/linux/ipipe_trace.h 2022-03-25 10:15:23.357002995 +0100 @@ -0,0 +1,83 @@ +/* -*- linux-c -*- + * include/linux/ipipe_trace.h + * + * Copyright (C) 2005 Luotao Fu. + * 2005-2007 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _LINUX_IPIPE_TRACE_H +#define _LINUX_IPIPE_TRACE_H + +#ifdef CONFIG_IPIPE_TRACE + +#include + +#ifndef BROKEN_BUILTIN_RETURN_ADDRESS +#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0)) +#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1)) +#endif /* !BUILTIN_RETURN_ADDRESS */ + +struct pt_regs; + +void ipipe_trace_begin(unsigned long v); +void ipipe_trace_end(unsigned long v); +void ipipe_trace_freeze(unsigned long v); +void ipipe_trace_special(unsigned char special_id, unsigned long v); +void ipipe_trace_pid(pid_t pid, short prio); +void ipipe_trace_event(unsigned char id, unsigned long delay_tsc); +int ipipe_trace_max_reset(void); +int ipipe_trace_frozen_reset(void); +void ipipe_trace_irqbegin(int irq, struct pt_regs *regs); +void ipipe_trace_irqend(int irq, struct pt_regs *regs); + +#else /* !CONFIG_IPIPE_TRACE */ + +#define ipipe_trace_begin(v) do { (void)(v); } while(0) +#define ipipe_trace_end(v) do { (void)(v); } while(0) +#define ipipe_trace_freeze(v) do { (void)(v); } while(0) +#define ipipe_trace_special(id, v) do { (void)(id); (void)(v); } while(0) +#define ipipe_trace_pid(pid, prio) do { (void)(pid); (void)(prio); } while(0) +#define ipipe_trace_event(id, delay_tsc) do { (void)(id); (void)(delay_tsc); } while(0) +#define ipipe_trace_max_reset() ({ 0; }) +#define ipipe_trace_frozen_reset() ({ 0; }) +#define ipipe_trace_irqbegin(irq, regs) do { } while(0) +#define ipipe_trace_irqend(irq, regs) do { } while(0) + +#endif /* !CONFIG_IPIPE_TRACE */ + +#ifdef CONFIG_IPIPE_TRACE_PANIC +void ipipe_trace_panic_freeze(void); +void ipipe_trace_panic_dump(void); +#else +static inline void ipipe_trace_panic_freeze(void) { } +static inline void ipipe_trace_panic_dump(void) { } +#endif + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF +#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq) +#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq) +#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL) +#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL) +#else +#define ipipe_trace_irq_entry(irq) do { (void)(irq);} while(0) +#define ipipe_trace_irq_exit(irq) do { (void)(irq);} while(0) +#define ipipe_trace_irqsoff() do { } while(0) +#define ipipe_trace_irqson() do { } while(0) +#endif + +#endif /* !__LINUX_IPIPE_TRACE_H */ diff -ruN linux-org/include/linux/irqchip/arm-gic.h linux/include/linux/irqchip/arm-gic.h --- linux-org/include/linux/irqchip/arm-gic.h 2022-03-25 09:55:42.357451734 +0100 +++ linux/include/linux/irqchip/arm-gic.h 2022-03-25 10:15:23.357002995 +0100 @@ -65,7 +65,11 @@ #define GICD_INT_EN_CLR_X32 0xffffffff #define GICD_INT_EN_SET_SGI 0x0000ffff #define GICD_INT_EN_CLR_PPI 0xffff0000 +#ifndef CONFIG_IPIPE #define GICD_INT_DEF_PRI 0xa0 +#else +#define GICD_INT_DEF_PRI 0x10 +#endif #define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ (GICD_INT_DEF_PRI << 16) |\ (GICD_INT_DEF_PRI << 8) |\ diff -ruN linux-org/include/linux/irqdesc.h linux/include/linux/irqdesc.h --- linux-org/include/linux/irqdesc.h 2022-03-25 09:55:42.357451734 +0100 +++ linux/include/linux/irqdesc.h 2022-03-25 10:15:23.357002995 +0100 @@ -56,6 +56,10 @@ struct irq_common_data irq_common_data; struct irq_data irq_data; unsigned int __percpu *kstat_irqs; +#ifdef CONFIG_IPIPE + void (*ipipe_ack)(struct irq_desc *desc); + void (*ipipe_end)(struct irq_desc *desc); +#endif /* CONFIG_IPIPE */ irq_flow_handler_t handle_irq; #ifdef CONFIG_IRQ_PREFLOW_FASTEOI irq_preflow_handler_t preflow_handler; @@ -183,6 +187,10 @@ return desc->action != NULL; } +irq_flow_handler_t +__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, + int is_chained); + static inline int irq_has_action(unsigned int irq) { return irq_desc_has_action(irq_to_desc(irq)); diff -ruN linux-org/include/linux/irq.h linux/include/linux/irq.h --- linux-org/include/linux/irq.h 2022-03-25 09:55:42.357451734 +0100 +++ linux/include/linux/irq.h 2022-03-25 10:15:23.357002995 +0100 @@ -452,6 +452,11 @@ void (*irq_bus_lock)(struct irq_data *data); void (*irq_bus_sync_unlock)(struct irq_data *data); +#ifdef CONFIG_IPIPE + void (*irq_move)(struct irq_data *data); + void (*irq_hold)(struct irq_data *data); + void (*irq_release)(struct irq_data *data); +#endif /* CONFIG_IPIPE */ void (*irq_cpu_online)(struct irq_data *data); void (*irq_cpu_offline)(struct irq_data *data); @@ -491,6 +496,7 @@ * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode + * IRQCHIP_PIPELINE_SAFE: Chip can work in pipelined mode */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -500,6 +506,7 @@ IRQCHIP_SKIP_SET_WAKE = (1 << 4), IRQCHIP_ONESHOT_SAFE = (1 << 5), IRQCHIP_EOI_THREADED = (1 << 6), + IRQCHIP_PIPELINE_SAFE = (1 << 7), }; #include @@ -587,6 +594,11 @@ extern void irq_chip_mask_parent(struct irq_data *data); extern void irq_chip_unmask_parent(struct irq_data *data); extern void irq_chip_eoi_parent(struct irq_data *data); +#ifdef CONFIG_IPIPE +extern void irq_chip_hold_parent(struct irq_data *data); +extern void irq_chip_release_parent(struct irq_data *data); +#endif + extern int irq_chip_set_affinity_parent(struct irq_data *data, const struct cpumask *dest, bool force); @@ -711,7 +723,14 @@ extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry); extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, struct msi_desc *entry); -extern struct irq_data *irq_get_irq_data(unsigned int irq); + +static inline __attribute__((const)) struct irq_data * +irq_get_irq_data(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + return desc ? &desc->irq_data : NULL; +} static inline struct irq_chip *irq_get_chip(unsigned int irq) { @@ -953,7 +972,11 @@ * different flow mechanisms (level/edge) for it. */ struct irq_chip_generic { +#ifdef CONFIG_IPIPE + ipipe_spinlock_t lock; +#else raw_spinlock_t lock; +#endif void __iomem *reg_base; u32 (*reg_readl)(void __iomem *addr); void (*reg_writel)(u32 val, void __iomem *addr); @@ -1081,18 +1104,28 @@ #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX) #ifdef CONFIG_SMP -static inline void irq_gc_lock(struct irq_chip_generic *gc) +static inline unsigned long irq_gc_lock(struct irq_chip_generic *gc) { - raw_spin_lock(&gc->lock); + unsigned long flags = 0; + raw_spin_lock_irqsave_cond(&gc->lock, flags); + return flags; } -static inline void irq_gc_unlock(struct irq_chip_generic *gc) +static inline void +irq_gc_unlock(struct irq_chip_generic *gc, unsigned long flags) { - raw_spin_unlock(&gc->lock); + raw_spin_unlock_irqrestore_cond(&gc->lock, flags); } #else -static inline void irq_gc_lock(struct irq_chip_generic *gc) { } -static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } +static inline unsigned long irq_gc_lock(struct irq_chip_generic *gc) +{ + return hard_cond_local_irq_save(); +} +static inline void +irq_gc_unlock(struct irq_chip_generic *gc, unsigned long flags) +{ + hard_cond_local_irq_restore(flags); +} #endif /* diff -ruN linux-org/include/linux/irqnr.h linux/include/linux/irqnr.h --- linux-org/include/linux/irqnr.h 2022-03-24 17:11:42.039105283 +0100 +++ linux/include/linux/irqnr.h 2022-03-25 10:15:23.357002995 +0100 @@ -6,7 +6,11 @@ extern int nr_irqs; +#if !defined(CONFIG_IPIPE) || defined(CONFIG_SPARSE_IRQ) extern struct irq_desc *irq_to_desc(unsigned int irq); +#else +#define irq_to_desc(irq) ({ ipipe_virtual_irq_p(irq) ? NULL : &irq_desc[irq]; }) +#endif unsigned int irq_get_next_irq(unsigned int offset); # define for_each_irq_desc(irq, desc) \ diff -ruN linux-org/include/linux/kernel.h linux/include/linux/kernel.h --- linux-org/include/linux/kernel.h 2022-03-25 09:55:42.361451718 +0100 +++ linux/include/linux/kernel.h 2022-03-25 10:15:23.357002995 +0100 @@ -205,9 +205,12 @@ #ifdef CONFIG_PREEMPT_VOLUNTARY extern int _cond_resched(void); -# define might_resched() _cond_resched() +# define might_resched() do { \ + ipipe_root_only(); \ + _cond_resched(); \ + } while (0) #else -# define might_resched() do { } while (0) +# define might_resched() ipipe_root_only() #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP diff -ruN linux-org/include/linux/kvm_host.h linux/include/linux/kvm_host.h --- linux-org/include/linux/kvm_host.h 2022-03-25 09:55:42.365451704 +0100 +++ linux/include/linux/kvm_host.h 2022-03-25 10:15:23.357002995 +0100 @@ -219,6 +219,9 @@ #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; #endif +#ifdef CONFIG_IPIPE + struct ipipe_vm_notifier ipipe_notifier; +#endif int cpu; int vcpu_id; int srcu_idx; diff -ruN linux-org/include/linux/preempt.h linux/include/linux/preempt.h --- linux-org/include/linux/preempt.h 2022-03-25 09:55:42.449451388 +0100 +++ linux/include/linux/preempt.h 2022-03-25 10:15:23.357002995 +0100 @@ -255,7 +255,28 @@ #endif /* CONFIG_PREEMPT_COUNT */ -#ifdef MODULE +#ifdef CONFIG_IPIPE +#define hard_preempt_disable() \ + ({ \ + unsigned long __flags__; \ + __flags__ = hard_local_irq_save(); \ + if (__ipipe_root_p) \ + preempt_disable(); \ + __flags__; \ + }) + +#define hard_preempt_enable(__flags__) \ + do { \ + if (__ipipe_root_p) { \ + preempt_enable_no_resched(); \ + hard_local_irq_restore(__flags__); \ + if (!hard_irqs_disabled_flags(__flags__)) \ + preempt_check_resched(); \ + } else \ + hard_local_irq_restore(__flags__); \ + } while (0) + +#elif defined(MODULE) /* * Modules have no business playing preemption tricks. */ @@ -263,7 +284,7 @@ #undef preempt_enable_no_resched #undef preempt_enable_no_resched_notrace #undef preempt_check_resched -#endif +#endif /* !IPIPE && MODULE */ #define preempt_set_need_resched() \ do { \ diff -ruN linux-org/include/linux/printk.h linux/include/linux/printk.h --- linux-org/include/linux/printk.h 2022-03-25 09:55:42.449451388 +0100 +++ linux/include/linux/printk.h 2022-03-25 10:15:23.357002995 +0100 @@ -155,6 +155,17 @@ static inline void printk_nmi_exit(void) { } #endif /* PRINTK_NMI */ +#ifdef CONFIG_RAW_PRINTK +void raw_vprintk(const char *fmt, va_list ap); +asmlinkage __printf(1, 2) +void raw_printk(const char *fmt, ...); +#else +static inline __cold +void raw_vprintk(const char *s, va_list ap) { } +static inline __printf(1, 2) __cold +void raw_printk(const char *s, ...) { } +#endif + #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) int vprintk_emit(int facility, int level, diff -ruN linux-org/include/linux/rwlock_api_smp.h linux/include/linux/rwlock_api_smp.h --- linux-org/include/linux/rwlock_api_smp.h 2022-03-25 09:55:42.465451327 +0100 +++ linux/include/linux/rwlock_api_smp.h 2022-03-25 10:15:23.357002995 +0100 @@ -141,7 +141,9 @@ * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are * not re-enabled during lock-acquire (which the preempt-spin-ops do): */ -#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) +#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ + defined(CONFIG_DEBUG_LOCK_ALLOC) || \ + defined(CONFIG_IPIPE) static inline void __raw_read_lock(rwlock_t *lock) { diff -ruN linux-org/include/linux/rwlock.h linux/include/linux/rwlock.h --- linux-org/include/linux/rwlock.h 2022-03-25 09:55:42.465451327 +0100 +++ linux/include/linux/rwlock.h 2022-03-25 10:15:23.357002995 +0100 @@ -61,8 +61,8 @@ #define read_trylock(lock) __cond_lock(lock, _raw_read_trylock(lock)) #define write_trylock(lock) __cond_lock(lock, _raw_write_trylock(lock)) -#define write_lock(lock) _raw_write_lock(lock) -#define read_lock(lock) _raw_read_lock(lock) +#define write_lock(lock) PICK_RWOP(_write_lock, lock) +#define read_lock(lock) PICK_RWOP(_read_lock, lock) #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -96,8 +96,8 @@ #define read_lock_bh(lock) _raw_read_lock_bh(lock) #define write_lock_irq(lock) _raw_write_lock_irq(lock) #define write_lock_bh(lock) _raw_write_lock_bh(lock) -#define read_unlock(lock) _raw_read_unlock(lock) -#define write_unlock(lock) _raw_write_unlock(lock) +#define read_unlock(lock) PICK_RWOP(_read_unlock, lock) +#define write_unlock(lock) PICK_RWOP(_write_unlock, lock) #define read_unlock_irq(lock) _raw_read_unlock_irq(lock) #define write_unlock_irq(lock) _raw_write_unlock_irq(lock) diff -ruN linux-org/include/linux/sched/coredump.h linux/include/linux/sched/coredump.h --- linux-org/include/linux/sched/coredump.h 2022-03-25 09:55:42.465451327 +0100 +++ linux/include/linux/sched/coredump.h 2022-03-25 10:15:23.357002995 +0100 @@ -72,6 +72,7 @@ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_VM_PINNED 31 /* ondemand load up and COW disabled */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK) diff -ruN linux-org/include/linux/sched.h linux/include/linux/sched.h --- linux-org/include/linux/sched.h 2022-03-25 09:55:42.465451327 +0100 +++ linux/include/linux/sched.h 2022-03-25 10:15:23.357002995 +0100 @@ -82,7 +82,9 @@ #define TASK_WAKING 0x0200 #define TASK_NOLOAD 0x0400 #define TASK_NEW 0x0800 -#define TASK_STATE_MAX 0x1000 +#define TASK_HARDENING 0x1000 +#define TASK_NOWAKEUP 0x2000 +#define TASK_STATE_MAX 0x4000 /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) diff -ruN linux-org/include/linux/spinlock_api_smp.h linux/include/linux/spinlock_api_smp.h --- linux-org/include/linux/spinlock_api_smp.h 2022-03-25 09:55:42.489451237 +0100 +++ linux/include/linux/spinlock_api_smp.h 2022-03-25 10:15:23.357002995 +0100 @@ -99,7 +99,9 @@ * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are * not re-enabled during lock-acquire (which the preempt-spin-ops do): */ -#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) +#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ + defined(CONFIG_DEBUG_LOCK_ALLOC) || \ + defined(CONFIG_IPIPE) static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock) { @@ -113,7 +115,7 @@ * do_raw_spin_lock_flags() code, because lockdep assumes * that interrupts are not re-enabled during lock-acquire: */ -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE) LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); #else do_raw_spin_lock_flags(lock, &flags); diff -ruN linux-org/include/linux/spinlock.h linux/include/linux/spinlock.h --- linux-org/include/linux/spinlock.h 2022-03-25 09:55:42.489451237 +0100 +++ linux/include/linux/spinlock.h 2022-03-25 10:15:23.357002995 +0100 @@ -90,10 +90,12 @@ # include #endif +#include + #ifdef CONFIG_DEBUG_SPINLOCK extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, struct lock_class_key *key); -# define raw_spin_lock_init(lock) \ +# define __real_raw_spin_lock_init(lock) \ do { \ static struct lock_class_key __key; \ \ @@ -101,11 +103,14 @@ } while (0) #else -# define raw_spin_lock_init(lock) \ +# define __real_raw_spin_lock_init(lock) \ do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0) #endif +#define raw_spin_lock_init(lock) PICK_SPINOP(_lock_init, lock) -#define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) +#define __real_raw_spin_is_locked(lock) \ + arch_spin_is_locked(&(lock)->raw_lock) +#define raw_spin_is_locked(lock) PICK_SPINOP_RET(_is_locked, lock, int) #ifdef CONFIG_GENERIC_LOCKBREAK #define raw_spin_is_contended(lock) ((lock)->break_lock) @@ -191,9 +196,11 @@ * various methods are defined as nops in the case they are not * required. */ -#define raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) +#define __real_raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) +#define raw_spin_trylock(lock) PICK_SPINOP_RET(_trylock, lock, int) -#define raw_spin_lock(lock) _raw_spin_lock(lock) +#define __real_raw_spin_lock(lock) _raw_spin_lock(lock) +#define raw_spin_lock(lock) PICK_SPINOP(_lock, lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC # define raw_spin_lock_nested(lock, subclass) \ @@ -217,7 +224,7 @@ #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -#define raw_spin_lock_irqsave(lock, flags) \ +#define __real_raw_spin_lock_irqsave(lock, flags) \ do { \ typecheck(unsigned long, flags); \ flags = _raw_spin_lock_irqsave(lock); \ @@ -239,7 +246,7 @@ #else -#define raw_spin_lock_irqsave(lock, flags) \ +#define __real_raw_spin_lock_irqsave(lock, flags) \ do { \ typecheck(unsigned long, flags); \ _raw_spin_lock_irqsave(lock, flags); \ @@ -250,34 +257,46 @@ #endif -#define raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock) +#define raw_spin_lock_irqsave(lock, flags) \ + PICK_SPINLOCK_IRQSAVE(lock, flags) + +#define __real_raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock) +#define raw_spin_lock_irq(lock) PICK_SPINOP(_lock_irq, lock) #define raw_spin_lock_bh(lock) _raw_spin_lock_bh(lock) -#define raw_spin_unlock(lock) _raw_spin_unlock(lock) -#define raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock) +#define __real_raw_spin_unlock(lock) _raw_spin_unlock(lock) +#define raw_spin_unlock(lock) PICK_SPINOP(_unlock, lock) +#define __real_raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock) +#define raw_spin_unlock_irq(lock) PICK_SPINOP(_unlock_irq, lock) -#define raw_spin_unlock_irqrestore(lock, flags) \ +#define __real_raw_spin_unlock_irqrestore(lock, flags) \ do { \ typecheck(unsigned long, flags); \ _raw_spin_unlock_irqrestore(lock, flags); \ } while (0) +#define raw_spin_unlock_irqrestore(lock, flags) \ + PICK_SPINUNLOCK_IRQRESTORE(lock, flags) + #define raw_spin_unlock_bh(lock) _raw_spin_unlock_bh(lock) #define raw_spin_trylock_bh(lock) \ __cond_lock(lock, _raw_spin_trylock_bh(lock)) -#define raw_spin_trylock_irq(lock) \ +#define __real_raw_spin_trylock_irq(lock) \ ({ \ local_irq_disable(); \ - raw_spin_trylock(lock) ? \ + __real_raw_spin_trylock(lock) ? \ 1 : ({ local_irq_enable(); 0; }); \ }) +#define raw_spin_trylock_irq(lock) PICK_SPINTRYLOCK_IRQ(lock) -#define raw_spin_trylock_irqsave(lock, flags) \ +#define __real_raw_spin_trylock_irqsave(lock, flags) \ ({ \ local_irq_save(flags); \ raw_spin_trylock(lock) ? \ 1 : ({ local_irq_restore(flags); 0; }); \ }) +#define raw_spin_trylock_irqsave(lock, flags) \ + PICK_SPINTRYLOCK_IRQSAVE(lock, flags) /** * raw_spin_can_lock - would raw_spin_trylock() succeed? @@ -308,24 +327,17 @@ #define spin_lock_init(_lock) \ do { \ - spinlock_check(_lock); \ - raw_spin_lock_init(&(_lock)->rlock); \ + raw_spin_lock_init(_lock); \ } while (0) -static __always_inline void spin_lock(spinlock_t *lock) -{ - raw_spin_lock(&lock->rlock); -} +#define spin_lock(lock) raw_spin_lock(lock) static __always_inline void spin_lock_bh(spinlock_t *lock) { raw_spin_lock_bh(&lock->rlock); } -static __always_inline int spin_trylock(spinlock_t *lock) -{ - return raw_spin_trylock(&lock->rlock); -} +#define spin_trylock(lock) raw_spin_trylock(lock) #define spin_lock_nested(lock, subclass) \ do { \ @@ -337,14 +349,11 @@ raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ } while (0) -static __always_inline void spin_lock_irq(spinlock_t *lock) -{ - raw_spin_lock_irq(&lock->rlock); -} +#define spin_lock_irq(lock) raw_spin_lock_irq(lock) #define spin_lock_irqsave(lock, flags) \ do { \ - raw_spin_lock_irqsave(spinlock_check(lock), flags); \ + raw_spin_lock_irqsave(lock, flags); \ } while (0) #define spin_lock_irqsave_nested(lock, flags, subclass) \ @@ -352,39 +361,28 @@ raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \ } while (0) -static __always_inline void spin_unlock(spinlock_t *lock) -{ - raw_spin_unlock(&lock->rlock); -} +#define spin_unlock(lock) raw_spin_unlock(lock) static __always_inline void spin_unlock_bh(spinlock_t *lock) { raw_spin_unlock_bh(&lock->rlock); } -static __always_inline void spin_unlock_irq(spinlock_t *lock) -{ - raw_spin_unlock_irq(&lock->rlock); -} +#define spin_unlock_irq(lock) raw_spin_unlock_irq(lock) -static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) -{ - raw_spin_unlock_irqrestore(&lock->rlock, flags); -} +#define spin_unlock_irqrestore(lock, flags) \ + raw_spin_unlock_irqrestore(lock, flags) static __always_inline int spin_trylock_bh(spinlock_t *lock) { return raw_spin_trylock_bh(&lock->rlock); } -static __always_inline int spin_trylock_irq(spinlock_t *lock) -{ - return raw_spin_trylock_irq(&lock->rlock); -} +#define spin_trylock_irq(lock) raw_spin_trylock_irq(lock) #define spin_trylock_irqsave(lock, flags) \ ({ \ - raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ + raw_spin_trylock_irqsave(lock, flags); \ }) static __always_inline int spin_is_locked(spinlock_t *lock) diff -ruN linux-org/include/linux/spinlock_up.h linux/include/linux/spinlock_up.h --- linux-org/include/linux/spinlock_up.h 2022-03-25 09:55:42.489451237 +0100 +++ linux/include/linux/spinlock_up.h 2022-03-25 10:15:23.357002995 +0100 @@ -56,16 +56,6 @@ lock->slock = 1; } -/* - * Read-write spinlocks. No debug version. - */ -#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) -#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) -#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) -#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) -#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) -#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) - #else /* DEBUG_SPINLOCK */ #define arch_spin_is_locked(lock) ((void)(lock), 0) /* for sched/core.c and kernel_lock.c: */ @@ -75,6 +65,13 @@ # define arch_spin_trylock(lock) ({ barrier(); (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ +#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) + #define arch_spin_is_contended(lock) (((void)(lock), 0)) #define arch_read_can_lock(lock) (((void)(lock), 1)) diff -ruN linux-org/include/linux/timekeeper_internal.h linux/include/linux/timekeeper_internal.h --- linux-org/include/linux/timekeeper_internal.h 2022-03-25 09:55:42.497451207 +0100 +++ linux/include/linux/timekeeper_internal.h 2022-03-25 10:15:23.357002995 +0100 @@ -135,7 +135,7 @@ #elif defined(CONFIG_GENERIC_TIME_VSYSCALL_OLD) extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm, - struct clocksource *c, u32 mult, + struct clocksource *c, u32 mult, u32 shift, u64 cycle_last); extern void update_vsyscall_tz(void); diff -ruN linux-org/include/linux/timekeeping.h linux/include/linux/timekeeping.h --- linux-org/include/linux/timekeeping.h 2022-03-25 09:55:42.497451207 +0100 +++ linux/include/linux/timekeeping.h 2022-03-25 10:15:23.357002995 +0100 @@ -347,5 +347,4 @@ extern int update_persistent_clock(struct timespec now); extern int update_persistent_clock64(struct timespec64 now); - #endif diff -ruN linux-org/init/Kconfig linux/init/Kconfig --- linux-org/init/Kconfig 2022-03-25 09:55:42.645450648 +0100 +++ linux/init/Kconfig 2022-03-25 10:15:23.357002995 +0100 @@ -80,6 +80,7 @@ config LOCALVERSION string "Local version - append to kernel release" + default "-ipipe" help Append an extra string to the end of your kernel version. This will show up when you type uname, for example. @@ -1237,6 +1238,18 @@ depends on PRINTK depends on HAVE_NMI +config RAW_PRINTK + bool "Enable support for raw printk" + default n + help + This option enables a printk variant called raw_printk() for + writing all output unmodified to a raw console channel + immediately, without any header or preparation whatsoever, + usable from any context. + + Unlike early_printk() console devices, raw_printk() devices + can live past the boot sequence. + config BUG bool "BUG() support" if EXPERT default y diff -ruN linux-org/init/main.c linux/init/main.c --- linux-org/init/main.c 2022-03-25 09:55:42.645450648 +0100 +++ linux/init/main.c 2022-03-25 10:15:23.357002995 +0100 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -521,7 +522,7 @@ cgroup_init_early(); - local_irq_disable(); + hard_local_irq_disable(); early_boot_irqs_disabled = true; /* @@ -569,6 +570,7 @@ pidhash_init(); vfs_caches_init_early(); sort_main_extable(); + __ipipe_init_early(); trap_init(); mm_init(); @@ -616,6 +618,11 @@ softirq_init(); timekeeping_init(); time_init(); + /* + * We need to wait for the interrupt and time subsystems to be + * initialized before enabling the pipeline. + */ + __ipipe_init(); sched_clock_postinit(); printk_safe_init(); perf_event_init(); @@ -913,6 +920,7 @@ shmem_init(); driver_init(); init_irq_proc(); + __ipipe_init_proc(); do_ctors(); usermodehelper_enable(); do_initcalls(); diff -ruN linux-org/kernel/context_tracking.c linux/kernel/context_tracking.c --- linux-org/kernel/context_tracking.c 2022-03-25 09:55:42.669450558 +0100 +++ linux/kernel/context_tracking.c 2022-03-25 10:15:23.357002995 +0100 @@ -113,7 +113,7 @@ * helpers are enough to protect RCU uses inside the exception. So * just return immediately if we detect we are in an IRQ. */ - if (in_interrupt()) + if (!ipipe_root_p || in_interrupt()) return; local_irq_save(flags); @@ -169,7 +169,7 @@ { unsigned long flags; - if (in_interrupt()) + if (!ipipe_root_p || in_interrupt()) return; local_irq_save(flags); diff -ruN linux-org/kernel/debug/debug_core.c linux/kernel/debug/debug_core.c --- linux-org/kernel/debug/debug_core.c 2022-03-25 09:55:42.669450558 +0100 +++ linux/kernel/debug/debug_core.c 2022-03-25 10:15:23.357002995 +0100 @@ -119,8 +119,8 @@ */ atomic_t kgdb_active = ATOMIC_INIT(-1); EXPORT_SYMBOL_GPL(kgdb_active); -static DEFINE_RAW_SPINLOCK(dbg_master_lock); -static DEFINE_RAW_SPINLOCK(dbg_slave_lock); +static IPIPE_DEFINE_RAW_SPINLOCK(dbg_master_lock); +static IPIPE_DEFINE_RAW_SPINLOCK(dbg_slave_lock); /* * We use NR_CPUs not PERCPU, in case kgdb is used to debug early @@ -461,7 +461,9 @@ static void dbg_touch_watchdogs(void) { touch_softlockup_watchdog_sync(); +#ifndef CONFIG_IPIPE clocksource_touch_watchdog(); +#endif rcu_cpu_stall_reset(); } @@ -492,7 +494,7 @@ * Interrupts will be restored by the 'trap return' code, except when * single stepping. */ - local_irq_save(flags); + flags = hard_local_irq_save(); cpu = ks->cpu; kgdb_info[cpu].debuggerinfo = regs; @@ -541,7 +543,7 @@ smp_mb__before_atomic(); atomic_dec(&slaves_in_kgdb); dbg_touch_watchdogs(); - local_irq_restore(flags); + hard_local_irq_restore(flags); return 0; } cpu_relax(); @@ -559,7 +561,7 @@ atomic_set(&kgdb_active, -1); raw_spin_unlock(&dbg_master_lock); dbg_touch_watchdogs(); - local_irq_restore(flags); + hard_local_irq_restore(flags); goto acquirelock; } @@ -676,7 +678,7 @@ atomic_set(&kgdb_active, -1); raw_spin_unlock(&dbg_master_lock); dbg_touch_watchdogs(); - local_irq_restore(flags); + hard_local_irq_restore(flags); return kgdb_info[cpu].ret_state; } @@ -795,9 +797,9 @@ if (!kgdb_connected || atomic_read(&kgdb_active) != -1 || dbg_kdb_mode) return; - local_irq_save(flags); + flags = hard_local_irq_save(); gdbstub_msg_write(s, count); - local_irq_restore(flags); + hard_local_irq_restore(flags); } static struct console kgdbcons = { diff -ruN linux-org/kernel/exit.c linux/kernel/exit.c --- linux-org/kernel/exit.c 2022-03-25 09:55:42.677450528 +0100 +++ linux/kernel/exit.c 2022-03-25 10:15:23.357002995 +0100 @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -820,6 +821,7 @@ */ raw_spin_lock_irq(&tsk->pi_lock); raw_spin_unlock_irq(&tsk->pi_lock); + __ipipe_report_exit(tsk); if (unlikely(in_atomic())) { pr_info("note: %s[%d] exited with preempt_count %d\n", diff -ruN linux-org/kernel/fork.c linux/kernel/fork.c --- linux-org/kernel/fork.c 2022-03-25 09:55:42.677450528 +0100 +++ linux/kernel/fork.c 2022-03-25 10:15:23.357002995 +0100 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -557,6 +558,8 @@ #endif setup_thread_stack(tsk, orig); + __ipipe_init_threadflags(task_thread_info(tsk)); + __ipipe_init_threadinfo(&task_thread_info(tsk)->ipipe_data); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); set_task_stack_end_magic(tsk); @@ -924,6 +927,7 @@ exit_aio(mm); ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ + __ipipe_report_cleanup(mm); exit_mmap(mm); mm_put_huge_zero_page(mm); set_mm_exe_file(mm, NULL); @@ -1927,6 +1931,7 @@ proc_fork_connector(p); cgroup_post_fork(p); cgroup_threadgroup_change_end(current); + __ipipe_init_taskinfo(p); perf_event_fork(p); trace_task_newtask(p, clone_flags); diff -ruN linux-org/kernel/ipipe/core.c linux/kernel/ipipe/core.c --- linux-org/kernel/ipipe/core.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/kernel/ipipe/core.c 2022-03-25 10:15:23.361002980 +0100 @@ -0,0 +1,1987 @@ +/* -*- linux-c -*- + * linux/kernel/ipipe/core.c + * + * Copyright (C) 2002-2012 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Architecture-independent I-PIPE core support. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PROC_FS +#include +#include +#endif /* CONFIG_PROC_FS */ +#include +#include +#include +#include +#include + +struct ipipe_domain ipipe_root; +EXPORT_SYMBOL_GPL(ipipe_root); + +struct ipipe_domain *ipipe_head_domain = &ipipe_root; +EXPORT_SYMBOL_GPL(ipipe_head_domain); + +#ifdef CONFIG_SMP +static __initdata struct ipipe_percpu_domain_data bootup_context = { + .status = IPIPE_STALL_MASK, + .domain = &ipipe_root, +}; +#else +#define bootup_context ipipe_percpu.root +#endif /* !CONFIG_SMP */ + +DEFINE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu) = { + .root = { + .status = IPIPE_STALL_MASK, + .domain = &ipipe_root, + }, + .curr = &bootup_context, + .hrtimer_irq = -1, +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + .context_check = 1, +#endif +}; +EXPORT_PER_CPU_SYMBOL(ipipe_percpu); + +/* Up to 2k of pending work data per CPU. */ +#define WORKBUF_SIZE 2048 +static DEFINE_PER_CPU_ALIGNED(unsigned char[WORKBUF_SIZE], work_buf); +static DEFINE_PER_CPU(void *, work_tail); +static unsigned int __ipipe_work_virq; + +static void __ipipe_do_work(unsigned int virq, void *cookie); + +#ifdef CONFIG_SMP + +#define IPIPE_CRITICAL_TIMEOUT 1000000 +static cpumask_t __ipipe_cpu_sync_map; +static cpumask_t __ipipe_cpu_lock_map; +static cpumask_t __ipipe_cpu_pass_map; +static unsigned long __ipipe_critical_lock; +static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier); +static atomic_t __ipipe_critical_count = ATOMIC_INIT(0); +static void (*__ipipe_cpu_sync) (void); + +#else /* !CONFIG_SMP */ +/* + * Create an alias to the unique root status, so that arch-dep code + * may get fast access to this percpu variable including from + * assembly. A hard-coded assumption is that root.status appears at + * offset #0 of the ipipe_percpu struct. + */ +extern unsigned long __ipipe_root_status +__attribute__((alias(__stringify(ipipe_percpu)))); +EXPORT_SYMBOL(__ipipe_root_status); + +#endif /* !CONFIG_SMP */ + +IPIPE_DEFINE_SPINLOCK(__ipipe_lock); + +static unsigned long __ipipe_virtual_irq_map; + +#ifdef CONFIG_PRINTK +unsigned int __ipipe_printk_virq; +int __ipipe_printk_bypass; +#endif /* CONFIG_PRINTK */ + +#ifdef CONFIG_PROC_FS + +struct proc_dir_entry *ipipe_proc_root; + +static int __ipipe_version_info_show(struct seq_file *p, void *data) +{ + seq_printf(p, "%d\n", IPIPE_CORE_RELEASE); + return 0; +} + +static int __ipipe_version_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, __ipipe_version_info_show, NULL); +} + +static const struct file_operations __ipipe_version_proc_ops = { + .open = __ipipe_version_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __ipipe_common_info_show(struct seq_file *p, void *data) +{ + struct ipipe_domain *ipd = (struct ipipe_domain *)p->private; + char handling, lockbit, virtuality; + unsigned long ctlbits; + unsigned int irq; + + seq_printf(p, " +--- Handled\n"); + seq_printf(p, " |+-- Locked\n"); + seq_printf(p, " ||+- Virtual\n"); + seq_printf(p, " [IRQ] ||| Handler\n"); + + mutex_lock(&ipd->mutex); + + for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { + ctlbits = ipd->irqs[irq].control; + /* + * There might be a hole between the last external IRQ + * and the first virtual one; skip it. + */ + if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) + continue; + + if (ipipe_virtual_irq_p(irq) + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)) + /* Non-allocated virtual IRQ; skip it. */ + continue; + + if (ctlbits & IPIPE_HANDLE_MASK) + handling = 'H'; + else + handling = '.'; + + if (ctlbits & IPIPE_LOCK_MASK) + lockbit = 'L'; + else + lockbit = '.'; + + if (ipipe_virtual_irq_p(irq)) + virtuality = 'V'; + else + virtuality = '.'; + + if (ctlbits & IPIPE_HANDLE_MASK) + seq_printf(p, " %4u: %c%c%c %pf\n", + irq, handling, lockbit, virtuality, + ipd->irqs[irq].handler); + else + seq_printf(p, " %4u: %c%c%c\n", + irq, handling, lockbit, virtuality); + } + + mutex_unlock(&ipd->mutex); + + return 0; +} + +static int __ipipe_common_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, __ipipe_common_info_show, PDE_DATA(inode)); +} + +static const struct file_operations __ipipe_info_proc_ops = { + .owner = THIS_MODULE, + .open = __ipipe_common_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void add_domain_proc(struct ipipe_domain *ipd) +{ + proc_create_data(ipd->name, 0444, ipipe_proc_root, + &__ipipe_info_proc_ops, ipd); +} + +void remove_domain_proc(struct ipipe_domain *ipd) +{ + remove_proc_entry(ipd->name, ipipe_proc_root); +} + +void __init __ipipe_init_proc(void) +{ + ipipe_proc_root = proc_mkdir("ipipe", NULL); + proc_create("version", 0444, ipipe_proc_root, + &__ipipe_version_proc_ops); + add_domain_proc(ipipe_root_domain); + + __ipipe_init_tracer(); +} + +#else + +static inline void add_domain_proc(struct ipipe_domain *ipd) +{ +} + +static inline void remove_domain_proc(struct ipipe_domain *ipd) +{ +} + +#endif /* CONFIG_PROC_FS */ + +static void init_stage(struct ipipe_domain *ipd) +{ + memset(&ipd->irqs, 0, sizeof(ipd->irqs)); + mutex_init(&ipd->mutex); + __ipipe_hook_critical_ipi(ipd); +} + +static inline int root_context_offset(void) +{ + void root_context_not_at_start_of_ipipe_percpu(void); + + /* ipipe_percpu.root must be found at offset #0. */ + + if (offsetof(struct ipipe_percpu_data, root)) + root_context_not_at_start_of_ipipe_percpu(); + + return 0; +} + +#ifdef CONFIG_SMP + +static inline void fixup_percpu_data(void) +{ + struct ipipe_percpu_data *p; + int cpu; + + /* + * ipipe_percpu.curr cannot be assigned statically to + * &ipipe_percpu.root, due to the dynamic nature of percpu + * data. So we make ipipe_percpu.curr refer to a temporary + * boot up context in static memory, until we can fixup all + * context pointers in this routine, after per-cpu areas have + * been eventually set up. The temporary context data is + * copied to per_cpu(ipipe_percpu, 0).root in the same move. + * + * Obviously, this code must run over the boot CPU, before SMP + * operations start. + */ + BUG_ON(smp_processor_id() || !irqs_disabled()); + + per_cpu(ipipe_percpu, 0).root = bootup_context; + + for_each_possible_cpu(cpu) { + p = &per_cpu(ipipe_percpu, cpu); + p->curr = &p->root; + } +} + +#else /* !CONFIG_SMP */ + +static inline void fixup_percpu_data(void) { } + +#endif /* CONFIG_SMP */ + +void __init __ipipe_init_early(void) +{ + struct ipipe_domain *ipd = &ipipe_root; + int cpu; + + fixup_percpu_data(); + + /* + * A lightweight registration code for the root domain. We are + * running on the boot CPU, hw interrupts are off, and + * secondary CPUs are still lost in space. + */ + ipd->name = "Linux"; + ipd->context_offset = root_context_offset(); + init_stage(ipd); + + /* + * Do the early init stuff. First we do the per-arch pipeline + * core setup, then we run the per-client setup code. At this + * point, the kernel does not provide much services yet: be + * careful. + */ + __ipipe_early_core_setup(); + __ipipe_early_client_setup(); + +#ifdef CONFIG_PRINTK + __ipipe_printk_virq = ipipe_alloc_virq(); + ipd->irqs[__ipipe_printk_virq].handler = __ipipe_flush_printk; + ipd->irqs[__ipipe_printk_virq].cookie = NULL; + ipd->irqs[__ipipe_printk_virq].ackfn = NULL; + ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK; +#endif /* CONFIG_PRINTK */ + + __ipipe_work_virq = ipipe_alloc_virq(); + ipd->irqs[__ipipe_work_virq].handler = __ipipe_do_work; + ipd->irqs[__ipipe_work_virq].cookie = NULL; + ipd->irqs[__ipipe_work_virq].ackfn = NULL; + ipd->irqs[__ipipe_work_virq].control = IPIPE_HANDLE_MASK; + + for_each_possible_cpu(cpu) + per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu); +} + +void __init __ipipe_init(void) +{ + /* Now we may engage the pipeline. */ + __ipipe_enable_pipeline(); + + pr_info("Interrupt pipeline (release #%d)\n", IPIPE_CORE_RELEASE); +} + +static inline void init_head_stage(struct ipipe_domain *ipd) +{ + struct ipipe_percpu_domain_data *p; + int cpu; + + /* Must be set first, used in ipipe_percpu_context(). */ + ipd->context_offset = offsetof(struct ipipe_percpu_data, head); + + for_each_online_cpu(cpu) { + p = ipipe_percpu_context(ipd, cpu); + memset(p, 0, sizeof(*p)); + p->domain = ipd; + } + + init_stage(ipd); +} + +void ipipe_register_head(struct ipipe_domain *ipd, const char *name) +{ + BUG_ON(!ipipe_root_p || ipd == &ipipe_root); + + ipd->name = name; + init_head_stage(ipd); + barrier(); + ipipe_head_domain = ipd; + add_domain_proc(ipd); + + pr_info("I-pipe: head domain %s registered.\n", name); +} +EXPORT_SYMBOL_GPL(ipipe_register_head); + +void ipipe_unregister_head(struct ipipe_domain *ipd) +{ + BUG_ON(!ipipe_root_p || ipd != ipipe_head_domain); + + ipipe_head_domain = &ipipe_root; + smp_mb(); + mutex_lock(&ipd->mutex); + remove_domain_proc(ipd); + mutex_unlock(&ipd->mutex); + + pr_info("I-pipe: head domain %s unregistered.\n", ipd->name); +} +EXPORT_SYMBOL_GPL(ipipe_unregister_head); + +void ipipe_unstall_root(void) +{ + struct ipipe_percpu_domain_data *p; + + hard_local_irq_disable(); + + /* This helps catching bad usage from assembly call sites. */ + ipipe_root_only(); + + p = ipipe_this_cpu_root_context(); + + __clear_bit(IPIPE_STALL_FLAG, &p->status); + + if (unlikely(__ipipe_ipending_p(p))) + __ipipe_sync_stage(); + + hard_local_irq_enable(); +} +EXPORT_SYMBOL(ipipe_unstall_root); + +void ipipe_restore_root(unsigned long x) +{ + ipipe_root_only(); + + if (x) + ipipe_stall_root(); + else + ipipe_unstall_root(); +} +EXPORT_SYMBOL(ipipe_restore_root); + +void __ipipe_restore_root_nosync(unsigned long x) +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_root_context(); + + if (raw_irqs_disabled_flags(x)) { + __set_bit(IPIPE_STALL_FLAG, &p->status); + trace_hardirqs_off(); + } else { + trace_hardirqs_on(); + __clear_bit(IPIPE_STALL_FLAG, &p->status); + } +} +EXPORT_SYMBOL_GPL(__ipipe_restore_root_nosync); + +void ipipe_unstall_head(void) +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(); + + hard_local_irq_disable(); + + __clear_bit(IPIPE_STALL_FLAG, &p->status); + + if (unlikely(__ipipe_ipending_p(p))) + __ipipe_sync_pipeline(ipipe_head_domain); + + hard_local_irq_enable(); +} +EXPORT_SYMBOL_GPL(ipipe_unstall_head); + +void __ipipe_restore_head(unsigned long x) /* hw interrupt off */ +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(); + + if (x) { +#ifdef CONFIG_DEBUG_KERNEL + static int warned; + if (!warned && + __test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) { + /* + * Already stalled albeit ipipe_restore_head() + * should have detected it? Send a warning once. + */ + hard_local_irq_enable(); + warned = 1; + pr_warning("I-pipe: ipipe_restore_head() " + "optimization failed.\n"); + dump_stack(); + hard_local_irq_disable(); + } +#else /* !CONFIG_DEBUG_KERNEL */ + __set_bit(IPIPE_STALL_FLAG, &p->status); +#endif /* CONFIG_DEBUG_KERNEL */ + } else { + __clear_bit(IPIPE_STALL_FLAG, &p->status); + if (unlikely(__ipipe_ipending_p(p))) + __ipipe_sync_pipeline(ipipe_head_domain); + hard_local_irq_enable(); + } +} +EXPORT_SYMBOL_GPL(__ipipe_restore_head); + +void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock) +{ + hard_local_irq_disable(); + if (ipipe_smp_p) + arch_spin_lock(&lock->arch_lock); + __set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); +} +EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irq); + +void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock) +{ + if (ipipe_smp_p) + arch_spin_unlock(&lock->arch_lock); + __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + hard_local_irq_enable(); +} +EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irq); + +unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock) +{ + unsigned long flags; + int s; + + flags = hard_local_irq_save(); + if (ipipe_smp_p) + arch_spin_lock(&lock->arch_lock); + s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + + return arch_mangle_irq_bits(s, flags); +} +EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irqsave); + +int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock, + unsigned long *x) +{ + unsigned long flags; + int s; + + flags = hard_local_irq_save(); + if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) { + hard_local_irq_restore(flags); + return 0; + } + s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + *x = arch_mangle_irq_bits(s, flags); + + return 1; +} +EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irqsave); + +void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock, + unsigned long x) +{ + if (ipipe_smp_p) + arch_spin_unlock(&lock->arch_lock); + if (!arch_demangle_irq_bits(&x)) + __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + hard_local_irq_restore(x); +} +EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irqrestore); + +int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock) +{ + unsigned long flags; + + flags = hard_local_irq_save(); + if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) { + hard_local_irq_restore(flags); + return 0; + } + __set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + + return 1; +} +EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irq); + +void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock) +{ + if (ipipe_smp_p) + arch_spin_unlock(&lock->arch_lock); +} + +void __ipipe_spin_unlock_irqcomplete(unsigned long x) +{ + if (!arch_demangle_irq_bits(&x)) + __clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status); + hard_local_irq_restore(x); +} + +#ifdef __IPIPE_3LEVEL_IRQMAP + +/* Must be called hw IRQs off. */ +static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, + unsigned int irq) +{ + __set_bit(irq, p->irqheld_map); + p->irqall[irq]++; +} + +/* Must be called hw IRQs off. */ +void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd); + int l0b, l1b; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); + l1b = irq / BITS_PER_LONG; + + if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { + __set_bit(irq, p->irqpend_lomap); + __set_bit(l1b, p->irqpend_mdmap); + __set_bit(l0b, &p->irqpend_himap); + } else + __set_bit(irq, p->irqheld_map); + + p->irqall[irq]++; +} +EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending); + +/* Must be called hw IRQs off. */ +void __ipipe_lock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = ipipe_root_domain; + struct ipipe_percpu_domain_data *p; + int l0b, l1b; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + /* + * Interrupts requested by a registered head domain cannot be + * locked, since this would make no sense: interrupts are + * globally masked at CPU level when the head domain is + * stalled, so there is no way we could encounter the + * situation IRQ locks are handling. + */ + if (test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) + return; + + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); + l1b = irq / BITS_PER_LONG; + + p = ipipe_this_cpu_context(ipd); + if (__test_and_clear_bit(irq, p->irqpend_lomap)) { + __set_bit(irq, p->irqheld_map); + if (p->irqpend_lomap[l1b] == 0) { + __clear_bit(l1b, p->irqpend_mdmap); + if (p->irqpend_mdmap[l0b] == 0) + __clear_bit(l0b, &p->irqpend_himap); + } + } +} +EXPORT_SYMBOL_GPL(__ipipe_lock_irq); + +/* Must be called hw IRQs off. */ +void __ipipe_unlock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = ipipe_root_domain; + struct ipipe_percpu_domain_data *p; + int l0b, l1b, cpu; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) + return; + + l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); + l1b = irq / BITS_PER_LONG; + + for_each_online_cpu(cpu) { + p = ipipe_this_cpu_root_context(); + if (test_and_clear_bit(irq, p->irqheld_map)) { + /* We need atomic ops here: */ + set_bit(irq, p->irqpend_lomap); + set_bit(l1b, p->irqpend_mdmap); + set_bit(l0b, &p->irqpend_himap); + } + } +} +EXPORT_SYMBOL_GPL(__ipipe_unlock_irq); + +static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p) +{ + int l0b, l1b, l2b; + unsigned long l0m, l1m, l2m; + unsigned int irq; + + l0m = p->irqpend_himap; + if (unlikely(l0m == 0)) + return -1; + + l0b = __ipipe_ffnz(l0m); + l1m = p->irqpend_mdmap[l0b]; + if (unlikely(l1m == 0)) + return -1; + + l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG; + l2m = p->irqpend_lomap[l1b]; + if (unlikely(l2m == 0)) + return -1; + + l2b = __ipipe_ffnz(l2m); + irq = l1b * BITS_PER_LONG + l2b; + + __clear_bit(irq, p->irqpend_lomap); + if (p->irqpend_lomap[l1b] == 0) { + __clear_bit(l1b, p->irqpend_mdmap); + if (p->irqpend_mdmap[l0b] == 0) + __clear_bit(l0b, &p->irqpend_himap); + } + + return irq; +} + +#else /* __IPIPE_2LEVEL_IRQMAP */ + +/* Must be called hw IRQs off. */ +static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, + unsigned int irq) +{ + __set_bit(irq, p->irqheld_map); + p->irqall[irq]++; +} + +/* Must be called hw IRQs off. */ +void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd); + int l0b = irq / BITS_PER_LONG; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { + __set_bit(irq, p->irqpend_lomap); + __set_bit(l0b, &p->irqpend_himap); + } else + __set_bit(irq, p->irqheld_map); + + p->irqall[irq]++; +} +EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending); + +/* Must be called hw IRQs off. */ +void __ipipe_lock_irq(unsigned int irq) +{ + struct ipipe_percpu_domain_data *p; + int l0b = irq / BITS_PER_LONG; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + if (test_and_set_bit(IPIPE_LOCK_FLAG, + &ipipe_root_domain->irqs[irq].control)) + return; + + p = ipipe_this_cpu_root_context(); + if (__test_and_clear_bit(irq, p->irqpend_lomap)) { + __set_bit(irq, p->irqheld_map); + if (p->irqpend_lomap[l0b] == 0) + __clear_bit(l0b, &p->irqpend_himap); + } +} +EXPORT_SYMBOL_GPL(__ipipe_lock_irq); + +/* Must be called hw IRQs off. */ +void __ipipe_unlock_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = ipipe_root_domain; + struct ipipe_percpu_domain_data *p; + int l0b = irq / BITS_PER_LONG, cpu; + + IPIPE_WARN_ONCE(!hard_irqs_disabled()); + + if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) + return; + + for_each_online_cpu(cpu) { + p = ipipe_percpu_context(ipd, cpu); + if (test_and_clear_bit(irq, p->irqheld_map)) { + /* We need atomic ops here: */ + set_bit(irq, p->irqpend_lomap); + set_bit(l0b, &p->irqpend_himap); + } + } +} +EXPORT_SYMBOL_GPL(__ipipe_unlock_irq); + +static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p) +{ + unsigned long l0m, l1m; + int l0b, l1b; + + l0m = p->irqpend_himap; + if (unlikely(l0m == 0)) + return -1; + + l0b = __ipipe_ffnz(l0m); + l1m = p->irqpend_lomap[l0b]; + if (unlikely(l1m == 0)) + return -1; + + l1b = __ipipe_ffnz(l1m); + __clear_bit(l1b, &p->irqpend_lomap[l0b]); + if (p->irqpend_lomap[l0b] == 0) + __clear_bit(l0b, &p->irqpend_himap); + + return l0b * BITS_PER_LONG + l1b; +} + +#endif /* __IPIPE_2LEVEL_IRQMAP */ + +void __ipipe_do_sync_pipeline(struct ipipe_domain *top) +{ + struct ipipe_percpu_domain_data *p; + struct ipipe_domain *ipd; + + /* We must enter over the root domain. */ + IPIPE_WARN_ONCE(__ipipe_current_domain != ipipe_root_domain); + ipd = top; +next: + p = ipipe_this_cpu_context(ipd); + if (test_bit(IPIPE_STALL_FLAG, &p->status)) + return; + + if (__ipipe_ipending_p(p)) { + if (ipd == ipipe_root_domain) + __ipipe_sync_stage(); + else { + /* Switching to head. */ + p->coflags &= ~__IPIPE_ALL_R; + __ipipe_set_current_context(p); + __ipipe_sync_stage(); + __ipipe_set_current_domain(ipipe_root_domain); + } + } + + if (ipd != ipipe_root_domain) { + ipd = ipipe_root_domain; + goto next; + } +} +EXPORT_SYMBOL_GPL(__ipipe_do_sync_pipeline); + +unsigned int ipipe_alloc_virq(void) +{ + unsigned long flags, irq = 0; + int ipos; + + raw_spin_lock_irqsave(&__ipipe_lock, flags); + + if (__ipipe_virtual_irq_map != ~0) { + ipos = ffz(__ipipe_virtual_irq_map); + set_bit(ipos, &__ipipe_virtual_irq_map); + irq = ipos + IPIPE_VIRQ_BASE; + } + + raw_spin_unlock_irqrestore(&__ipipe_lock, flags); + + return irq; +} +EXPORT_SYMBOL_GPL(ipipe_alloc_virq); + +void ipipe_free_virq(unsigned int virq) +{ + clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); + smp_mb__after_atomic(); +} +EXPORT_SYMBOL_GPL(ipipe_free_virq); + +int ipipe_request_irq(struct ipipe_domain *ipd, + unsigned int irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t ackfn) +{ + unsigned long flags; + int ret = 0; + + ipipe_root_only(); + + if (handler == NULL || + (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))) + return -EINVAL; + + raw_spin_lock_irqsave(&__ipipe_lock, flags); + + if (ipd->irqs[irq].handler) { + ret = -EBUSY; + goto out; + } + + if (ackfn == NULL) + ackfn = ipipe_root_domain->irqs[irq].ackfn; + + ipd->irqs[irq].handler = handler; + ipd->irqs[irq].cookie = cookie; + ipd->irqs[irq].ackfn = ackfn; + ipd->irqs[irq].control = IPIPE_HANDLE_MASK; + + if (irq < IPIPE_NR_ROOT_IRQS) + __ipipe_enable_irqdesc(ipd, irq); +out: + raw_spin_unlock_irqrestore(&__ipipe_lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(ipipe_request_irq); + +void ipipe_free_irq(struct ipipe_domain *ipd, + unsigned int irq) +{ + unsigned long flags; + + ipipe_root_only(); + + raw_spin_lock_irqsave(&__ipipe_lock, flags); + + if (ipd->irqs[irq].handler == NULL) + goto out; + + ipd->irqs[irq].handler = NULL; + ipd->irqs[irq].cookie = NULL; + ipd->irqs[irq].ackfn = NULL; + ipd->irqs[irq].control = 0; + + if (irq < IPIPE_NR_ROOT_IRQS) + __ipipe_disable_irqdesc(ipd, irq); +out: + raw_spin_unlock_irqrestore(&__ipipe_lock, flags); +} +EXPORT_SYMBOL_GPL(ipipe_free_irq); + +void ipipe_set_hooks(struct ipipe_domain *ipd, int enables) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + int cpu, wait; + + if (ipd == ipipe_root_domain) { + IPIPE_WARN(enables & __IPIPE_TRAP_E); + enables &= ~__IPIPE_TRAP_E; + } else { + IPIPE_WARN(enables & __IPIPE_KEVENT_E); + enables &= ~__IPIPE_KEVENT_E; + } + + flags = ipipe_critical_enter(NULL); + + for_each_online_cpu(cpu) { + p = ipipe_percpu_context(ipd, cpu); + p->coflags &= ~__IPIPE_ALL_E; + p->coflags |= enables; + } + + wait = (enables ^ __IPIPE_ALL_E) << __IPIPE_SHIFT_R; + if (wait == 0 || !__ipipe_root_p) { + ipipe_critical_exit(flags); + return; + } + + ipipe_this_cpu_context(ipd)->coflags &= ~wait; + + ipipe_critical_exit(flags); + + /* + * In case we cleared some hooks over the root domain, we have + * to wait for any ongoing execution to finish, since our + * caller might subsequently unmap the target domain code. + * + * We synchronize with the relevant __ipipe_notify_*() + * helpers, disabling all hooks before we start waiting for + * completion on all CPUs. + */ + for_each_online_cpu(cpu) { + while (ipipe_percpu_context(ipd, cpu)->coflags & wait) + schedule_timeout_interruptible(HZ / 50); + } +} +EXPORT_SYMBOL_GPL(ipipe_set_hooks); + +int __weak ipipe_fastcall_hook(struct pt_regs *regs) +{ + return -1; /* i.e. fall back to slow path. */ +} + +int __weak ipipe_syscall_hook(struct ipipe_domain *ipd, struct pt_regs *regs) +{ + return 0; +} + +int __weak ipipe_get_domain_slope_hook(struct task_struct *prev, + struct task_struct *next) +{ + /* + * A co-kernel must provide this hook, or bad things may + * happen when sections protected by fpu_kernel_begin(), + * fpu_kernel_end() pairs are preempted by co-kernel threads + * also using the FPU! + */ + return 0; +} + +void __ipipe_root_sync(void) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + + flags = hard_local_irq_save(); + + p = ipipe_this_cpu_root_context(); + if (__ipipe_ipending_p(p)) + __ipipe_sync_stage(); + + hard_local_irq_restore(flags); +} + +int __ipipe_notify_syscall(struct pt_regs *regs) +{ + struct ipipe_domain *caller_domain, *this_domain, *ipd; + struct ipipe_percpu_domain_data *p; + unsigned long flags; + int ret = 0; + + /* + * We should definitely not pipeline a syscall with IRQs off. + */ + IPIPE_WARN_ONCE(hard_irqs_disabled()); + + flags = hard_local_irq_save(); + caller_domain = this_domain = __ipipe_current_domain; + ipd = ipipe_head_domain; +next: + p = ipipe_this_cpu_context(ipd); + if (likely(p->coflags & __IPIPE_SYSCALL_E)) { + __ipipe_set_current_context(p); + p->coflags |= __IPIPE_SYSCALL_R; + hard_local_irq_restore(flags); + ret = ipipe_syscall_hook(caller_domain, regs); + flags = hard_local_irq_save(); + p->coflags &= ~__IPIPE_SYSCALL_R; + if (__ipipe_current_domain != ipd) + /* Account for domain migration. */ + this_domain = __ipipe_current_domain; + else + __ipipe_set_current_domain(this_domain); + } + + if (this_domain == ipipe_root_domain) { + if (ipd != ipipe_root_domain && ret == 0) { + ipd = ipipe_root_domain; + goto next; + } + /* + * Careful: we may have migrated from head->root, so p + * would be ipipe_this_cpu_context(head). + */ + p = ipipe_this_cpu_root_context(); + if (__ipipe_ipending_p(p)) + __ipipe_sync_stage(); + } else if (ipipe_test_thread_flag(TIP_MAYDAY)) + __ipipe_call_mayday(regs); + + hard_local_irq_restore(flags); + + return ret; +} + +int __weak ipipe_trap_hook(struct ipipe_trap_data *data) +{ + return 0; +} + +int __ipipe_notify_trap(int exception, struct pt_regs *regs) +{ + struct ipipe_percpu_domain_data *p; + struct ipipe_trap_data data; + unsigned long flags; + int ret = 0; + + flags = hard_local_irq_save(); + + /* + * We send a notification about all traps raised over a + * registered head domain only. + */ + if (__ipipe_root_p) + goto out; + + p = ipipe_this_cpu_head_context(); + if (likely(p->coflags & __IPIPE_TRAP_E)) { + p->coflags |= __IPIPE_TRAP_R; + hard_local_irq_restore(flags); + data.exception = exception; + data.regs = regs; + ret = ipipe_trap_hook(&data); + flags = hard_local_irq_save(); + p->coflags &= ~__IPIPE_TRAP_R; + } +out: + hard_local_irq_restore(flags); + + return ret; +} + +int __weak ipipe_kevent_hook(int kevent, void *data) +{ + return 0; +} + +int __ipipe_notify_kevent(int kevent, void *data) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + int ret = 0; + + ipipe_root_only(); + + flags = hard_local_irq_save(); + + p = ipipe_this_cpu_root_context(); + if (likely(p->coflags & __IPIPE_KEVENT_E)) { + p->coflags |= __IPIPE_KEVENT_R; + hard_local_irq_restore(flags); + ret = ipipe_kevent_hook(kevent, data); + flags = hard_local_irq_save(); + p->coflags &= ~__IPIPE_KEVENT_R; + } + + hard_local_irq_restore(flags); + + return ret; +} + +void __weak ipipe_migration_hook(struct task_struct *p) +{ +} + +static void complete_domain_migration(void) /* hw IRQs off */ +{ + struct ipipe_percpu_domain_data *p; + struct ipipe_percpu_data *pd; + struct task_struct *t; + + ipipe_root_only(); + pd = raw_cpu_ptr(&ipipe_percpu); + t = pd->task_hijacked; + if (t == NULL) + return; + + pd->task_hijacked = NULL; + t->state &= ~TASK_HARDENING; + if (t->state != TASK_INTERRUPTIBLE) + /* Migration aborted (by signal). */ + return; + + ipipe_set_ti_thread_flag(task_thread_info(t), TIP_HEAD); + p = ipipe_this_cpu_head_context(); + IPIPE_WARN_ONCE(test_bit(IPIPE_STALL_FLAG, &p->status)); + /* + * hw IRQs are disabled, but the completion hook assumes the + * head domain is logically stalled: fix it up. + */ + __set_bit(IPIPE_STALL_FLAG, &p->status); + ipipe_migration_hook(t); + __clear_bit(IPIPE_STALL_FLAG, &p->status); + if (__ipipe_ipending_p(p)) + __ipipe_sync_pipeline(p->domain); +} + +void __ipipe_complete_domain_migration(void) +{ + unsigned long flags; + + flags = hard_local_irq_save(); + complete_domain_migration(); + hard_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(__ipipe_complete_domain_migration); + +int __ipipe_switch_tail(void) +{ + int x; + +#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + hard_local_irq_disable(); +#endif + x = __ipipe_root_p; + if (x) + complete_domain_migration(); + +#ifndef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH + if (x) +#endif + hard_local_irq_enable(); + + return !x; +} + +void __ipipe_notify_vm_preemption(void) +{ + struct ipipe_vm_notifier *vmf; + struct ipipe_percpu_data *p; + + ipipe_check_irqoff(); + p = __ipipe_raw_cpu_ptr(&ipipe_percpu); + vmf = p->vm_notifier; + if (unlikely(vmf)) + vmf->handler(vmf); +} +EXPORT_SYMBOL_GPL(__ipipe_notify_vm_preemption); + +static void dispatch_irq_head(unsigned int irq) /* hw interrupts off */ +{ + struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(), *old; + struct ipipe_domain *head = p->domain; + + if (unlikely(test_bit(IPIPE_STALL_FLAG, &p->status))) { + __ipipe_set_irq_pending(head, irq); + return; + } + + /* Switch to the head domain if not current. */ + old = __ipipe_current_context; + if (old != p) + __ipipe_set_current_context(p); + + p->irqall[irq]++; + __set_bit(IPIPE_STALL_FLAG, &p->status); + barrier(); + head->irqs[irq].handler(irq, head->irqs[irq].cookie); + __ipipe_run_irqtail(irq); + hard_local_irq_disable(); + p = ipipe_this_cpu_head_context(); + __clear_bit(IPIPE_STALL_FLAG, &p->status); + + /* Are we still running in the head domain? */ + if (likely(__ipipe_current_context == p)) { + /* Did we enter this code over the head domain? */ + if (old->domain == head) { + /* Yes, do immediate synchronization. */ + if (__ipipe_ipending_p(p)) + __ipipe_sync_stage(); + return; + } + __ipipe_set_current_context(ipipe_this_cpu_root_context()); + } + + /* + * We must be running over the root domain, synchronize + * the pipeline for high priority IRQs (slow path). + */ + __ipipe_do_sync_pipeline(head); +} + +void __ipipe_dispatch_irq(unsigned int irq, int flags) /* hw interrupts off */ +{ + struct ipipe_domain *ipd; + struct irq_desc *desc; + unsigned long control; + int chained_irq; + + /* + * Survival kit when reading this code: + * + * - we have two main situations, leading to three cases for + * handling interrupts: + * + * a) the root domain is alone, no registered head domain + * => all interrupts go through the interrupt log + * b) a head domain is registered + * => head domain IRQs go through the fast dispatcher + * => root domain IRQs go through the interrupt log + * + * - when no head domain is registered, ipipe_head_domain == + * ipipe_root_domain == &ipipe_root. + * + * - the caller tells us whether we should acknowledge this + * IRQ. Even virtual IRQs may require acknowledge on some + * platforms (e.g. arm/SMP). + * + * - the caller tells us whether we may try to run the IRQ log + * syncer. Typically, demuxed IRQs won't be synced + * immediately. + * + * - multiplex IRQs most likely have a valid acknowledge + * handler and we may not be called with IPIPE_IRQF_NOACK + * for them. The ack handler for the multiplex IRQ actually + * decodes the demuxed interrupts. + */ + +#ifdef CONFIG_IPIPE_DEBUG + if (unlikely(irq >= IPIPE_NR_IRQS) || + (irq < IPIPE_NR_ROOT_IRQS && irq_to_desc(irq) == NULL)) { + pr_err("I-pipe: spurious interrupt %u\n", irq); + return; + } +#endif + /* + * CAUTION: on some archs, virtual IRQs may have acknowledge + * handlers. Multiplex IRQs should have one too. + */ + if (unlikely(irq >= IPIPE_NR_ROOT_IRQS)) { + desc = NULL; + chained_irq = 0; + } else { + desc = irq_to_desc(irq); + chained_irq = desc ? ipipe_chained_irq_p(desc) : 0; + } + if (flags & IPIPE_IRQF_NOACK) + IPIPE_WARN_ONCE(chained_irq); + else { + ipd = ipipe_head_domain; + control = ipd->irqs[irq].control; + if ((control & IPIPE_HANDLE_MASK) == 0) + ipd = ipipe_root_domain; + if (ipd->irqs[irq].ackfn) + ipd->irqs[irq].ackfn(desc); + if (chained_irq) { + if ((flags & IPIPE_IRQF_NOSYNC) == 0) + /* Run demuxed IRQ handlers. */ + goto sync; + return; + } + } + + /* + * Sticky interrupts must be handled early and separately, so + * that we always process them on the current domain. + */ + ipd = __ipipe_current_domain; + control = ipd->irqs[irq].control; + if (control & IPIPE_STICKY_MASK) + goto log; + + /* + * In case we have no registered head domain + * (i.e. ipipe_head_domain == &ipipe_root), we always go + * through the interrupt log, and leave the dispatching work + * ultimately to __ipipe_sync_pipeline(). + */ + ipd = ipipe_head_domain; + control = ipd->irqs[irq].control; + if (ipd == ipipe_root_domain) + /* + * The root domain must handle all interrupts, so + * testing the HANDLE bit would be pointless. + */ + goto log; + + if (control & IPIPE_HANDLE_MASK) { + if (unlikely(flags & IPIPE_IRQF_NOSYNC)) + __ipipe_set_irq_pending(ipd, irq); + else + dispatch_irq_head(irq); + return; + } + + ipd = ipipe_root_domain; +log: + __ipipe_set_irq_pending(ipd, irq); + + if (flags & IPIPE_IRQF_NOSYNC) + return; + + /* + * Optimize if we preempted a registered high priority head + * domain: we don't need to synchronize the pipeline unless + * there is a pending interrupt for it. + */ + if (!__ipipe_root_p && + !__ipipe_ipending_p(ipipe_this_cpu_head_context())) + return; +sync: + __ipipe_sync_pipeline(ipipe_head_domain); +} + +void ipipe_raise_irq(unsigned int irq) +{ + struct ipipe_domain *ipd = ipipe_head_domain; + unsigned long flags, control; + + flags = hard_local_irq_save(); + + /* + * Fast path: raising a virtual IRQ handled by the head + * domain. + */ + if (likely(ipipe_virtual_irq_p(irq) && ipd != ipipe_root_domain)) { + control = ipd->irqs[irq].control; + if (likely(control & IPIPE_HANDLE_MASK)) { + dispatch_irq_head(irq); + goto out; + } + } + + /* Emulate regular device IRQ receipt. */ + __ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK); +out: + hard_local_irq_restore(flags); + +} +EXPORT_SYMBOL_GPL(ipipe_raise_irq); + +static void sync_root_irqs(void) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + + flags = hard_local_irq_save(); + + p = ipipe_this_cpu_root_context(); + if (unlikely(__ipipe_ipending_p(p))) + __ipipe_sync_stage(); + + hard_local_irq_restore(flags); +} + +int ipipe_handle_syscall(struct thread_info *ti, + unsigned long nr, struct pt_regs *regs) +{ + unsigned long local_flags = READ_ONCE(ti->ipipe_flags); + int ret; + + /* + * NOTE: This is a backport from the DOVETAIL syscall + * redirector to the older pipeline implementation. + * + * == + * + * If the syscall # is out of bounds and the current IRQ stage + * is not the root one, this has to be a non-native system + * call handled by some co-kernel on the head stage. Hand it + * over to the head stage via the fast syscall handler. + * + * Otherwise, if the system call is out of bounds or the + * current thread is shared with a co-kernel, hand the syscall + * over to the latter through the pipeline stages. This + * allows: + * + * - the co-kernel to receive the initial - foreign - syscall + * a thread should send for enabling syscall handling by the + * co-kernel. + * + * - the co-kernel to manipulate the current execution stage + * for handling the request, which includes switching the + * current thread back to the root stage if the syscall is a + * native one, or promoting it to the head stage if handling + * the foreign syscall requires this. + * + * Native syscalls from regular (non-pipeline) threads are + * ignored by this routine, and flow down to the regular + * system call handler. + */ + + if (nr >= NR_syscalls && (local_flags & _TIP_HEAD)) { + ipipe_fastcall_hook(regs); + local_flags = READ_ONCE(ti->ipipe_flags); + if (local_flags & _TIP_HEAD) { + if (local_flags & _TIP_MAYDAY) + __ipipe_call_mayday(regs); + return 1; /* don't pass down, no tail work. */ + } else { + sync_root_irqs(); + return -1; /* don't pass down, do tail work. */ + } + } + + if ((local_flags & _TIP_NOTIFY) || nr >= NR_syscalls) { + ret =__ipipe_notify_syscall(regs); + local_flags = READ_ONCE(ti->ipipe_flags); + if (local_flags & _TIP_HEAD) + return 1; /* don't pass down, no tail work. */ + if (ret) + return -1; /* don't pass down, do tail work. */ + } + + return 0; /* pass syscall down to the host. */ +} + +#ifdef CONFIG_PREEMPT + +void preempt_schedule_irq(void); + +void __sched __ipipe_preempt_schedule_irq(void) +{ + struct ipipe_percpu_domain_data *p; + unsigned long flags; + + if (WARN_ON_ONCE(!hard_irqs_disabled())) + hard_local_irq_disable(); + + local_irq_save(flags); + hard_local_irq_enable(); + preempt_schedule_irq(); /* Ok, may reschedule now. */ + hard_local_irq_disable(); + + /* + * Flush any pending interrupt that may have been logged after + * preempt_schedule_irq() stalled the root stage before + * returning to us, and now. + */ + p = ipipe_this_cpu_root_context(); + if (unlikely(__ipipe_ipending_p(p))) { + trace_hardirqs_on(); + __clear_bit(IPIPE_STALL_FLAG, &p->status); + __ipipe_sync_stage(); + } + + __ipipe_restore_root_nosync(flags); +} + +#else /* !CONFIG_PREEMPT */ + +#define __ipipe_preempt_schedule_irq() do { } while (0) + +#endif /* !CONFIG_PREEMPT */ + +#ifdef CONFIG_TRACE_IRQFLAGS +#define root_stall_after_handler() local_irq_disable() +#else +#define root_stall_after_handler() do { } while (0) +#endif + +/* + * __ipipe_do_sync_stage() -- Flush the pending IRQs for the current + * domain (and processor). This routine flushes the interrupt log (see + * "Optimistic interrupt protection" from D. Stodolsky et al. for more + * on the deferred interrupt scheme). Every interrupt that occurred + * while the pipeline was stalled gets played. + * + * WARNING: CPU migration may occur over this routine. + */ +void __ipipe_do_sync_stage(void) +{ + struct ipipe_percpu_domain_data *p; + struct ipipe_domain *ipd; + int irq; + + p = __ipipe_current_context; +respin: + ipd = p->domain; + + __set_bit(IPIPE_STALL_FLAG, &p->status); + smp_wmb(); + + if (ipd == ipipe_root_domain) + trace_hardirqs_off(); + + for (;;) { + irq = __ipipe_next_irq(p); + if (irq < 0) + break; + /* + * Make sure the compiler does not reorder wrongly, so + * that all updates to maps are done before the + * handler gets called. + */ + barrier(); + + if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) + continue; + + if (ipd != ipipe_head_domain) + hard_local_irq_enable(); + + if (likely(ipd != ipipe_root_domain)) { + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); + __ipipe_run_irqtail(irq); + hard_local_irq_disable(); + } else if (ipipe_virtual_irq_p(irq)) { + irq_enter(); + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); + irq_exit(); + root_stall_after_handler(); + hard_local_irq_disable(); + } else { + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); + root_stall_after_handler(); + hard_local_irq_disable(); + } + + /* + * We may have migrated to a different CPU (1) upon + * return from the handler, or downgraded from the + * head domain to the root one (2), the opposite way + * is NOT allowed though. + * + * (1) reload the current per-cpu context pointer, so + * that we further pull pending interrupts from the + * proper per-cpu log. + * + * (2) check the stall bit to know whether we may + * dispatch any interrupt pending for the root domain, + * and respin the entire dispatch loop if + * so. Otherwise, immediately return to the caller, + * _without_ affecting the stall state for the root + * domain, since we do not own it at this stage. This + * case is basically reflecting what may happen in + * dispatch_irq_head() for the fast path. + */ + p = __ipipe_current_context; + if (p->domain != ipd) { + IPIPE_BUG_ON(ipd == ipipe_root_domain); + if (test_bit(IPIPE_STALL_FLAG, &p->status)) + return; + goto respin; + } + } + + if (ipd == ipipe_root_domain) + trace_hardirqs_on(); + + __clear_bit(IPIPE_STALL_FLAG, &p->status); +} + +void __ipipe_call_mayday(struct pt_regs *regs) +{ + unsigned long flags; + + ipipe_clear_thread_flag(TIP_MAYDAY); + flags = hard_local_irq_save(); + __ipipe_notify_trap(IPIPE_TRAP_MAYDAY, regs); + hard_local_irq_restore(flags); +} + +#ifdef CONFIG_SMP + +/* Always called with hw interrupts off. */ +void __ipipe_do_critical_sync(unsigned int irq, void *cookie) +{ + int cpu = ipipe_processor_id(); + + cpumask_set_cpu(cpu, &__ipipe_cpu_sync_map); + + /* + * Now we are in sync with the lock requestor running on + * another CPU. Enter a spinning wait until he releases the + * global lock. + */ + raw_spin_lock(&__ipipe_cpu_barrier); + + /* Got it. Now get out. */ + + /* Call the sync routine if any. */ + if (__ipipe_cpu_sync) + __ipipe_cpu_sync(); + + cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map); + + raw_spin_unlock(&__ipipe_cpu_barrier); + + cpumask_clear_cpu(cpu, &__ipipe_cpu_sync_map); +} +#endif /* CONFIG_SMP */ + +unsigned long ipipe_critical_enter(void (*syncfn)(void)) +{ + cpumask_t allbutself __maybe_unused, online __maybe_unused; + int cpu __maybe_unused, n __maybe_unused; + unsigned long flags, loops __maybe_unused; + + flags = hard_local_irq_save(); + + if (num_online_cpus() == 1) + return flags; + +#ifdef CONFIG_SMP + + cpu = ipipe_processor_id(); + if (!cpumask_test_and_set_cpu(cpu, &__ipipe_cpu_lock_map)) { + while (test_and_set_bit(0, &__ipipe_critical_lock)) { + n = 0; + hard_local_irq_enable(); + + do + cpu_relax(); + while (++n < cpu); + + hard_local_irq_disable(); + } +restart: + online = *cpu_online_mask; + raw_spin_lock(&__ipipe_cpu_barrier); + + __ipipe_cpu_sync = syncfn; + + cpumask_clear(&__ipipe_cpu_pass_map); + cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map); + + /* + * Send the sync IPI to all processors but the current + * one. + */ + cpumask_andnot(&allbutself, &online, &__ipipe_cpu_pass_map); + ipipe_send_ipi(IPIPE_CRITICAL_IPI, allbutself); + loops = IPIPE_CRITICAL_TIMEOUT; + + while (!cpumask_equal(&__ipipe_cpu_sync_map, &allbutself)) { + if (--loops > 0) { + cpu_relax(); + continue; + } + /* + * We ran into a deadlock due to a contended + * rwlock. Cancel this round and retry. + */ + __ipipe_cpu_sync = NULL; + + raw_spin_unlock(&__ipipe_cpu_barrier); + /* + * Ensure all CPUs consumed the IPI to avoid + * running __ipipe_cpu_sync prematurely. This + * usually resolves the deadlock reason too. + */ + while (!cpumask_equal(&online, &__ipipe_cpu_pass_map)) + cpu_relax(); + + goto restart; + } + } + + atomic_inc(&__ipipe_critical_count); + +#endif /* CONFIG_SMP */ + + return flags; +} +EXPORT_SYMBOL_GPL(ipipe_critical_enter); + +void ipipe_critical_exit(unsigned long flags) +{ + if (num_online_cpus() == 1) { + hard_local_irq_restore(flags); + return; + } + +#ifdef CONFIG_SMP + if (atomic_dec_and_test(&__ipipe_critical_count)) { + raw_spin_unlock(&__ipipe_cpu_barrier); + while (!cpumask_empty(&__ipipe_cpu_sync_map)) + cpu_relax(); + cpumask_clear_cpu(ipipe_processor_id(), &__ipipe_cpu_lock_map); + clear_bit(0, &__ipipe_critical_lock); + smp_mb__after_atomic(); + } +#endif /* CONFIG_SMP */ + + hard_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ipipe_critical_exit); + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + +void ipipe_root_only(void) +{ + struct ipipe_domain *this_domain; + unsigned long flags; + + flags = hard_smp_local_irq_save(); + + this_domain = __ipipe_current_domain; + if (likely(this_domain == ipipe_root_domain && + !test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status))) { + hard_smp_local_irq_restore(flags); + return; + } + + if (!__this_cpu_read(ipipe_percpu.context_check)) { + hard_smp_local_irq_restore(flags); + return; + } + + hard_smp_local_irq_restore(flags); + + ipipe_prepare_panic(); + ipipe_trace_panic_freeze(); + + if (this_domain != ipipe_root_domain) + pr_err("I-pipe: Detected illicit call from head domain '%s'\n" + " into a regular Linux service\n", + this_domain->name); + else + pr_err("I-pipe: Detected stalled head domain, " + "probably caused by a bug.\n" + " A critical section may have been " + "left unterminated.\n"); + dump_stack(); + ipipe_trace_panic_dump(); +} +EXPORT_SYMBOL(ipipe_root_only); + +#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ + +#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) + +int notrace __ipipe_check_percpu_access(void) +{ + struct ipipe_percpu_domain_data *p; + struct ipipe_domain *this_domain; + unsigned long flags; + int ret = 0; + + flags = hard_local_irq_save_notrace(); + + /* + * Don't use __ipipe_current_domain here, this would recurse + * indefinitely. + */ + this_domain = raw_cpu_read(ipipe_percpu.curr)->domain; + + /* + * Only the root domain may implement preemptive CPU migration + * of tasks, so anything above in the pipeline should be fine. + */ + if (this_domain != ipipe_root_domain) + goto out; + + if (raw_irqs_disabled_flags(flags)) + goto out; + + /* + * Last chance: hw interrupts were enabled on entry while + * running over the root domain, but the root stage might be + * currently stalled, in which case preemption would be + * disabled, and no migration could occur. + */ + + p = raw_cpu_ptr(&ipipe_percpu.root); + if (!preemptible()) + goto out; + /* + * Our caller may end up accessing the wrong per-cpu variable + * instance due to CPU migration; tell it to complain about + * this. + */ + ret = 1; +out: + hard_local_irq_restore_notrace(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(__ipipe_check_percpu_access); + +void __ipipe_spin_unlock_debug(unsigned long flags) +{ + /* + * We catch a nasty issue where spin_unlock_irqrestore() on a + * regular kernel spinlock is about to re-enable hw interrupts + * in a section entered with hw irqs off. This is clearly the + * sign of a massive breakage coming. Usual suspect is a + * regular spinlock which was overlooked, used within a + * section which must run with hw irqs disabled. + */ + IPIPE_WARN_ONCE(!raw_irqs_disabled_flags(flags) && hard_irqs_disabled()); +} +EXPORT_SYMBOL(__ipipe_spin_unlock_debug); + +#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */ + +void ipipe_prepare_panic(void) +{ +#ifdef CONFIG_PRINTK + __ipipe_printk_bypass = 1; +#endif + ipipe_context_check_off(); +} +EXPORT_SYMBOL_GPL(ipipe_prepare_panic); + +static void __ipipe_do_work(unsigned int virq, void *cookie) +{ + struct ipipe_work_header *work; + unsigned long flags; + void *curr, *tail; + int cpu; + + /* + * Work is dispatched in enqueuing order. This interrupt + * context can't migrate to another CPU. + */ + cpu = smp_processor_id(); + curr = per_cpu(work_buf, cpu); + + for (;;) { + flags = hard_local_irq_save(); + tail = per_cpu(work_tail, cpu); + if (curr == tail) { + per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu); + hard_local_irq_restore(flags); + return; + } + work = curr; + curr += work->size; + hard_local_irq_restore(flags); + work->handler(work); + } +} + +void __ipipe_post_work_root(struct ipipe_work_header *work) +{ + unsigned long flags; + void *tail; + int cpu; + + /* + * Subtle: we want to use the head stall/unstall operators, + * not the hard_* routines to protect against races. This way, + * we ensure that a root-based caller will trigger the virq + * handling immediately when unstalling the head stage, as a + * result of calling __ipipe_sync_pipeline() under the hood. + */ + flags = ipipe_test_and_stall_head(); + cpu = ipipe_processor_id(); + tail = per_cpu(work_tail, cpu); + + if (WARN_ON_ONCE((unsigned char *)tail + work->size >= + per_cpu(work_buf, cpu) + WORKBUF_SIZE)) + goto out; + + /* Work handling is deferred, so data has to be copied. */ + memcpy(tail, work, work->size); + per_cpu(work_tail, cpu) = tail + work->size; + ipipe_post_irq_root(__ipipe_work_virq); +out: + ipipe_restore_head(flags); +} +EXPORT_SYMBOL_GPL(__ipipe_post_work_root); + +void __weak __ipipe_arch_share_current(int flags) +{ +} + +void __ipipe_share_current(int flags) +{ + ipipe_root_only(); + + __ipipe_arch_share_current(flags); +} +EXPORT_SYMBOL_GPL(__ipipe_share_current); + +bool __weak ipipe_cpuidle_control(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + /* + * Allow entering the idle state by default, matching the + * original behavior when CPU_IDLE is turned + * on. ipipe_cpuidle_control() should be overriden by the + * client domain code for determining whether the CPU may + * actually enter the idle state. + */ + return true; +} + +bool __ipipe_enter_cpuidle(void) +{ + struct ipipe_percpu_domain_data *p; + + /* + * We may go idle if no interrupt is waiting delivery from the + * root stage. + */ + hard_local_irq_disable(); + p = ipipe_this_cpu_root_context(); + + return !__ipipe_ipending_p(p); +} + +bool ipipe_enter_cpuidle(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + /* + * Pending IRQs or a co-kernel may deny the transition to + * idle. + */ + return __ipipe_enter_cpuidle() && ipipe_cpuidle_control(dev, state); +} + +void ipipe_exit_cpuidle(void) +{ + /* unstall and re-enable hw IRQs too. */ + local_irq_enable(); +} + +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \ + defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT) +void __ipipe_uaccess_might_fault(void) +{ + struct ipipe_percpu_domain_data *pdd; + struct ipipe_domain *ipd; + unsigned long flags; + + flags = hard_local_irq_save(); + ipd = __ipipe_current_domain; + if (ipd == ipipe_root_domain) { + hard_local_irq_restore(flags); + might_fault(); + return; + } + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + pdd = ipipe_this_cpu_context(ipd); + WARN_ON_ONCE(hard_irqs_disabled_flags(flags) + || test_bit(IPIPE_STALL_FLAG, &pdd->status)); +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + (void)pdd; +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + hard_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(__ipipe_uaccess_might_fault); +#endif diff -ruN linux-org/kernel/ipipe/Kconfig linux/kernel/ipipe/Kconfig --- linux-org/kernel/ipipe/Kconfig 1970-01-01 01:00:00.000000000 +0100 +++ linux/kernel/ipipe/Kconfig 2022-03-25 10:15:23.357002995 +0100 @@ -0,0 +1,44 @@ +config IPIPE + bool "Interrupt pipeline" + default y + ---help--- + Activate this option if you want the interrupt pipeline to be + compiled in. + +config IPIPE_CORE + def_bool y if IPIPE + +config IPIPE_WANT_CLOCKSOURCE + bool + +config IPIPE_WANT_PTE_PINNING + bool + +config IPIPE_CORE_APIREV + int + depends on IPIPE + default 2 + ---help--- + The API revision level we implement. + +config IPIPE_WANT_APIREV_2 + bool + +config IPIPE_TARGET_APIREV + int + depends on IPIPE + default IPIPE_CORE_APIREV + ---help--- + The API revision level the we want (must be <= + IPIPE_CORE_APIREV). + +config IPIPE_HAVE_HOSTRT + bool + +config IPIPE_HAVE_EAGER_FPU + bool + +if IPIPE && ARM && RAW_PRINTK && !DEBUG_LL +comment "CAUTION: DEBUG_LL must be selected, and properly configured for" +comment "RAW_PRINTK to work. Otherwise, you will get no output on raw_printk()" +endif diff -ruN linux-org/kernel/ipipe/Kconfig.debug linux/kernel/ipipe/Kconfig.debug --- linux-org/kernel/ipipe/Kconfig.debug 1970-01-01 01:00:00.000000000 +0100 +++ linux/kernel/ipipe/Kconfig.debug 2022-03-25 10:15:23.357002995 +0100 @@ -0,0 +1,96 @@ +config IPIPE_DEBUG + bool "I-pipe debugging" + depends on IPIPE + select RAW_PRINTK + +config IPIPE_DEBUG_CONTEXT + bool "Check for illicit cross-domain calls" + depends on IPIPE_DEBUG + default y + ---help--- + Enable this feature to arm checkpoints in the kernel that + verify the correct invocation context. On entry of critical + Linux services a warning is issued if the caller is not + running over the root domain. + +config IPIPE_DEBUG_INTERNAL + bool "Enable internal debug checks" + depends on IPIPE_DEBUG + default y + ---help--- + When this feature is enabled, I-pipe will perform internal + consistency checks of its subsystems, e.g. on per-cpu variable + access. + +config IPIPE_TRACE + bool "Latency tracing" + depends on IPIPE_DEBUG + select CONFIG_FTRACE + select CONFIG_FUNCTION_TRACER + select KALLSYMS + select PROC_FS + ---help--- + Activate this option if you want to use per-function tracing of + the kernel. The tracer will collect data via instrumentation + features like the one below or with the help of explicite calls + of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the + in-kernel tracing API. The collected data and runtime control + is available via /proc/ipipe/trace/*. + +if IPIPE_TRACE + +config IPIPE_TRACE_ENABLE + bool "Enable tracing on boot" + default y + ---help--- + Disable this option if you want to arm the tracer after booting + manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce + boot time on slow embedded devices due to the tracer overhead. + +config IPIPE_TRACE_MCOUNT + bool "Instrument function entries" + default y + select FTRACE + select FUNCTION_TRACER + ---help--- + When enabled, records every kernel function entry in the tracer + log. While this slows down the system noticeably, it provides + the highest level of information about the flow of events. + However, it can be switch off in order to record only explicit + I-pipe trace points. + +config IPIPE_TRACE_IRQSOFF + bool "Trace IRQs-off times" + default y + ---help--- + Activate this option if I-pipe shall trace the longest path + with hard-IRQs switched off. + +config IPIPE_TRACE_SHIFT + int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)" + range 10 18 + default 14 + ---help--- + The number of trace points to hold tracing data for each + trace path, as a power of 2. + +config IPIPE_TRACE_VMALLOC + bool "Use vmalloc'ed trace buffer" + default y if EMBEDDED + ---help--- + Instead of reserving static kernel data, the required buffer + is allocated via vmalloc during boot-up when this option is + enabled. This can help to start systems that are low on memory, + but it slightly degrades overall performance. Try this option + when a traced kernel hangs unexpectedly at boot time. + +config IPIPE_TRACE_PANIC + bool "Enable panic back traces" + default y + ---help--- + Provides services to freeze and dump a back trace on panic + situations. This is used on IPIPE_DEBUG_CONTEXT exceptions + as well as ordinary kernel oopses. You can control the number + of printed back trace points via /proc/ipipe/trace. + +endif diff -ruN linux-org/kernel/ipipe/Makefile linux/kernel/ipipe/Makefile --- linux-org/kernel/ipipe/Makefile 1970-01-01 01:00:00.000000000 +0100 +++ linux/kernel/ipipe/Makefile 2022-03-25 10:15:23.357002995 +0100 @@ -0,0 +1,2 @@ +obj-$(CONFIG_IPIPE) += core.o timer.o +obj-$(CONFIG_IPIPE_TRACE) += tracer.o diff -ruN linux-org/kernel/ipipe/timer.c linux/kernel/ipipe/timer.c --- linux-org/kernel/ipipe/timer.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/kernel/ipipe/timer.c 2022-03-25 10:15:23.361002980 +0100 @@ -0,0 +1,588 @@ +/* -*- linux-c -*- + * linux/kernel/ipipe/timer.c + * + * Copyright (C) 2012 Gilles Chanteperdrix + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * I-pipe timer request interface. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long __ipipe_hrtimer_freq; + +static LIST_HEAD(timers); +static IPIPE_DEFINE_SPINLOCK(lock); + +static DEFINE_PER_CPU(struct ipipe_timer *, percpu_timer); + +#ifdef CONFIG_GENERIC_CLOCKEVENTS +/* + * Default request method: switch to oneshot mode if supported. + */ +static void ipipe_timer_default_request(struct ipipe_timer *timer, int steal) +{ + struct clock_event_device *evtdev = timer->host_timer; + + if (!(evtdev->features & CLOCK_EVT_FEAT_ONESHOT)) + return; + + if (clockevent_state_oneshot(evtdev) || + clockevent_state_oneshot_stopped(evtdev)) + timer->orig_mode = CLOCK_EVT_MODE_ONESHOT; + else { + if (clockevent_state_periodic(evtdev)) + timer->orig_mode = CLOCK_EVT_MODE_PERIODIC; + else if (clockevent_state_shutdown(evtdev)) + timer->orig_mode = CLOCK_EVT_MODE_SHUTDOWN; + else + timer->orig_mode = CLOCK_EVT_MODE_UNUSED; + evtdev->set_state_oneshot(evtdev); + evtdev->set_next_event(timer->freq / HZ, evtdev); + } +} + +/* + * Default release method: return the timer to the mode it had when + * starting. + */ +static void ipipe_timer_default_release(struct ipipe_timer *timer) +{ + struct clock_event_device *evtdev = timer->host_timer; + + switch (timer->orig_mode) { + case CLOCK_EVT_MODE_SHUTDOWN: + evtdev->set_state_shutdown(evtdev); + break; + case CLOCK_EVT_MODE_PERIODIC: + evtdev->set_state_periodic(evtdev); + case CLOCK_EVT_MODE_ONESHOT: + evtdev->set_next_event(timer->freq / HZ, evtdev); + break; + } +} + +static int get_dev_mode(struct clock_event_device *evtdev) +{ + if (clockevent_state_oneshot(evtdev) || + clockevent_state_oneshot_stopped(evtdev)) + return CLOCK_EVT_MODE_ONESHOT; + + if (clockevent_state_periodic(evtdev)) + return CLOCK_EVT_MODE_PERIODIC; + + if (clockevent_state_shutdown(evtdev)) + return CLOCK_EVT_MODE_SHUTDOWN; + + return CLOCK_EVT_MODE_UNUSED; +} + +void ipipe_host_timer_register(struct clock_event_device *evtdev) +{ + struct ipipe_timer *timer = evtdev->ipipe_timer; + + if (timer == NULL) + return; + + timer->orig_mode = CLOCK_EVT_MODE_UNUSED; + + if (timer->request == NULL) + timer->request = ipipe_timer_default_request; + + /* + * By default, use the same method as linux timer, on ARM at + * least, most set_next_event methods are safe to be called + * from Xenomai domain anyway. + */ + if (timer->set == NULL) { + timer->timer_set = evtdev; + timer->set = (typeof(timer->set))evtdev->set_next_event; + } + + if (timer->release == NULL) + timer->release = ipipe_timer_default_release; + + if (timer->name == NULL) + timer->name = evtdev->name; + + if (timer->rating == 0) + timer->rating = evtdev->rating; + + timer->freq = (1000000000ULL * evtdev->mult) >> evtdev->shift; + + if (timer->min_delay_ticks == 0) + timer->min_delay_ticks = + (evtdev->min_delta_ns * evtdev->mult) >> evtdev->shift; + + if (timer->cpumask == NULL) + timer->cpumask = evtdev->cpumask; + + timer->host_timer = evtdev; + + ipipe_timer_register(timer); +} +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + +/* + * register a timer: maintain them in a list sorted by rating + */ +void ipipe_timer_register(struct ipipe_timer *timer) +{ + struct ipipe_timer *t; + unsigned long flags; + + if (timer->timer_set == NULL) + timer->timer_set = timer; + + if (timer->cpumask == NULL) + timer->cpumask = cpumask_of(smp_processor_id()); + + raw_spin_lock_irqsave(&lock, flags); + + list_for_each_entry(t, &timers, link) { + if (t->rating <= timer->rating) { + __list_add(&timer->link, t->link.prev, &t->link); + goto done; + } + } + list_add_tail(&timer->link, &timers); + done: + raw_spin_unlock_irqrestore(&lock, flags); +} + +static void ipipe_timer_request_sync(void) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + struct clock_event_device *evtdev; + int steal; + + if (!timer) + return; + + evtdev = timer->host_timer; + +#ifdef CONFIG_GENERIC_CLOCKEVENTS + steal = evtdev != NULL && !clockevent_state_detached(evtdev); +#else /* !CONFIG_GENERIC_CLOCKEVENTS */ + steal = 1; +#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ + + timer->request(timer, steal); +} + +static void config_pcpu_timer(struct ipipe_timer *t, unsigned hrclock_freq) +{ + unsigned long long tmp; + unsigned hrtimer_freq; + + if (__ipipe_hrtimer_freq != t->freq) + __ipipe_hrtimer_freq = t->freq; + + hrtimer_freq = t->freq; + if (__ipipe_hrclock_freq > UINT_MAX) + hrtimer_freq /= 1000; + + t->c2t_integ = hrtimer_freq / hrclock_freq; + tmp = (((unsigned long long) + (hrtimer_freq % hrclock_freq)) << 32) + + hrclock_freq - 1; + do_div(tmp, hrclock_freq); + t->c2t_frac = tmp; +} + +/* Set up a timer as per-cpu timer for ipipe */ +static void install_pcpu_timer(unsigned cpu, unsigned hrclock_freq, + struct ipipe_timer *t) +{ + per_cpu(ipipe_percpu.hrtimer_irq, cpu) = t->irq; + per_cpu(percpu_timer, cpu) = t; + config_pcpu_timer(t, hrclock_freq); +} + +static void select_root_only_timer(unsigned cpu, unsigned hrclock_khz, + const struct cpumask *mask, + struct ipipe_timer *t) { + unsigned icpu; + struct clock_event_device *evtdev; + + /* + * If no ipipe-supported CPU shares an interrupt with the + * timer, we do not need to care about it. + */ + for_each_cpu(icpu, mask) { + if (t->irq == per_cpu(ipipe_percpu.hrtimer_irq, icpu)) { +#ifdef CONFIG_GENERIC_CLOCKEVENTS + evtdev = t->host_timer; + if (evtdev && clockevent_state_shutdown(evtdev)) + continue; +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + goto found; + } + } + + return; + +found: + install_pcpu_timer(cpu, hrclock_khz, t); +} + +/* + * Choose per-cpu timers with the highest rating by traversing the + * rating-sorted list for each CPU. + */ +int ipipe_select_timers(const struct cpumask *mask) +{ + unsigned hrclock_freq; + unsigned long long tmp; + struct ipipe_timer *t; + struct clock_event_device *evtdev; + unsigned long flags; + unsigned cpu; + cpumask_t fixup; + + if (!__ipipe_hrclock_ok()) { + printk("I-pipe: high-resolution clock not working\n"); + return -ENODEV; + } + + if (__ipipe_hrclock_freq > UINT_MAX) { + tmp = __ipipe_hrclock_freq; + do_div(tmp, 1000); + hrclock_freq = tmp; + } else + hrclock_freq = __ipipe_hrclock_freq; + + raw_spin_lock_irqsave(&lock, flags); + + /* First, choose timers for the CPUs handled by ipipe */ + for_each_cpu(cpu, mask) { + list_for_each_entry(t, &timers, link) { + if (!cpumask_test_cpu(cpu, t->cpumask)) + continue; + +#ifdef CONFIG_GENERIC_CLOCKEVENTS + evtdev = t->host_timer; + if (evtdev && clockevent_state_shutdown(evtdev)) + continue; +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + goto found; + } + + printk("I-pipe: could not find timer for cpu #%d\n", + cpu); + goto err_remove_all; +found: + install_pcpu_timer(cpu, hrclock_freq, t); + } + + /* + * Second, check if we need to fix up any CPUs not supported + * by ipipe (but by Linux) whose interrupt may need to be + * forwarded because they have the same IRQ as an ipipe-enabled + * timer. + */ + cpumask_andnot(&fixup, cpu_online_mask, mask); + + for_each_cpu(cpu, &fixup) { + list_for_each_entry(t, &timers, link) { + if (!cpumask_test_cpu(cpu, t->cpumask)) + continue; + + select_root_only_timer(cpu, hrclock_freq, mask, t); + } + } + + raw_spin_unlock_irqrestore(&lock, flags); + + flags = ipipe_critical_enter(ipipe_timer_request_sync); + ipipe_timer_request_sync(); + ipipe_critical_exit(flags); + + return 0; + +err_remove_all: + raw_spin_unlock_irqrestore(&lock, flags); + + for_each_cpu(cpu, mask) { + per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1; + per_cpu(percpu_timer, cpu) = NULL; + } + __ipipe_hrtimer_freq = 0; + + return -ENODEV; +} + +static void ipipe_timer_release_sync(void) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + if (timer) + timer->release(timer); +} + +void ipipe_timers_release(void) +{ + unsigned long flags; + unsigned cpu; + + flags = ipipe_critical_enter(ipipe_timer_release_sync); + ipipe_timer_release_sync(); + ipipe_critical_exit(flags); + + for_each_online_cpu(cpu) { + per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1; + per_cpu(percpu_timer, cpu) = NULL; + __ipipe_hrtimer_freq = 0; + } +} + +static void __ipipe_ack_hrtimer_irq(struct irq_desc *desc) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + if (desc) + desc->ipipe_ack(desc); + if (timer->ack) + timer->ack(); + if (desc) + desc->ipipe_end(desc); +} + +static int do_set_oneshot(struct clock_event_device *cdev) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + timer->mode_handler(CLOCK_EVT_MODE_ONESHOT, cdev); + + return 0; +} + +static int do_set_periodic(struct clock_event_device *cdev) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + timer->mode_handler(CLOCK_EVT_MODE_PERIODIC, cdev); + + return 0; +} + +static int do_set_shutdown(struct clock_event_device *cdev) +{ + struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer); + + timer->mode_handler(CLOCK_EVT_MODE_SHUTDOWN, cdev); + + return 0; +} + +int ipipe_timer_start(void (*tick_handler)(void), + void (*emumode)(enum clock_event_mode mode, + struct clock_event_device *cdev), + int (*emutick)(unsigned long evt, + struct clock_event_device *cdev), + unsigned cpu) +{ + struct clock_event_device *evtdev; + struct ipipe_timer *timer; + struct irq_desc *desc; + unsigned long flags; + int steal, ret; + + timer = per_cpu(percpu_timer, cpu); + evtdev = timer->host_timer; + + flags = ipipe_critical_enter(NULL); + + ret = ipipe_request_irq(ipipe_head_domain, timer->irq, + (ipipe_irq_handler_t)tick_handler, + NULL, __ipipe_ack_hrtimer_irq); + if (ret < 0 && ret != -EBUSY) { + ipipe_critical_exit(flags); + return ret; + } + +#ifdef CONFIG_GENERIC_CLOCKEVENTS + steal = evtdev != NULL && !clockevent_state_detached(evtdev); + if (steal && evtdev->ipipe_stolen == 0) { + timer->real_mult = evtdev->mult; + timer->real_shift = evtdev->shift; + timer->orig_set_state_periodic = evtdev->set_state_periodic; + timer->orig_set_state_oneshot = evtdev->set_state_oneshot; + timer->orig_set_state_oneshot_stopped = evtdev->set_state_oneshot_stopped; + timer->orig_set_state_shutdown = evtdev->set_state_shutdown; + timer->orig_set_next_event = evtdev->set_next_event; + timer->mode_handler = emumode; + evtdev->mult = 1; + evtdev->shift = 0; + evtdev->max_delta_ns = UINT_MAX; + evtdev->set_state_periodic = do_set_periodic; + evtdev->set_state_oneshot = do_set_oneshot; + evtdev->set_state_oneshot_stopped = do_set_oneshot; + evtdev->set_state_shutdown = do_set_shutdown; + evtdev->set_next_event = emutick; + evtdev->ipipe_stolen = 1; + } + + ret = get_dev_mode(evtdev); +#else /* CONFIG_GENERIC_CLOCKEVENTS */ + steal = 1; + ret = 0; +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + + ipipe_critical_exit(flags); + + desc = irq_to_desc(timer->irq); + if (desc && irqd_irq_disabled(&desc->irq_data)) + ipipe_enable_irq(timer->irq); + + return ret; +} + +void ipipe_timer_stop(unsigned cpu) +{ + unsigned long __maybe_unused flags; + struct clock_event_device *evtdev; + struct ipipe_timer *timer; + struct irq_desc *desc; + + timer = per_cpu(percpu_timer, cpu); + evtdev = timer->host_timer; + + desc = irq_to_desc(timer->irq); + if (desc && irqd_irq_disabled(&desc->irq_data)) + ipipe_disable_irq(timer->irq); + +#ifdef CONFIG_GENERIC_CLOCKEVENTS + if (evtdev) { + flags = ipipe_critical_enter(NULL); + + if (evtdev->ipipe_stolen) { + evtdev->mult = timer->real_mult; + evtdev->shift = timer->real_shift; + evtdev->set_state_periodic = timer->orig_set_state_periodic; + evtdev->set_state_oneshot = timer->orig_set_state_oneshot; + evtdev->set_state_oneshot_stopped = timer->orig_set_state_oneshot_stopped; + evtdev->set_state_shutdown = timer->orig_set_state_shutdown; + evtdev->set_next_event = timer->orig_set_next_event; + evtdev->ipipe_stolen = 0; + } + + ipipe_critical_exit(flags); + } +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + + ipipe_free_irq(ipipe_head_domain, timer->irq); +} + +void ipipe_timer_set(unsigned long cdelay) +{ + unsigned long tdelay; + struct ipipe_timer *t; + + t = __ipipe_raw_cpu_read(percpu_timer); + + /* + * Even though some architectures may use a 64 bits delay + * here, we voluntarily limit to 32 bits, 4 billions ticks + * should be enough for now. Would a timer needs more, an + * extra call to the tick handler would simply occur after 4 + * billions ticks. + */ + if (cdelay > UINT_MAX) + cdelay = UINT_MAX; + + tdelay = cdelay; + if (t->c2t_integ != 1) + tdelay *= t->c2t_integ; + if (t->c2t_frac) + tdelay += ((unsigned long long)cdelay * t->c2t_frac) >> 32; + if (tdelay < t->min_delay_ticks) + tdelay = t->min_delay_ticks; + + if (t->set(tdelay, t->timer_set) < 0) + ipipe_raise_irq(t->irq); +} +EXPORT_SYMBOL_GPL(ipipe_timer_set); + +const char *ipipe_timer_name(void) +{ + return per_cpu(percpu_timer, 0)->name; +} +EXPORT_SYMBOL_GPL(ipipe_timer_name); + +unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns) +{ + unsigned long long tmp; + BUG_ON(!timer->freq); + tmp = (unsigned long long)ns * timer->freq; + do_div(tmp, 1000000000); + return tmp; +} + +#ifdef CONFIG_IPIPE_HAVE_HOSTRT +/* + * NOTE: The architecture specific code must only call this function + * when a clocksource suitable for CLOCK_HOST_REALTIME is enabled. + * The event receiver is responsible for providing proper locking. + */ +void ipipe_update_hostrt(struct timekeeper *tk) +{ + struct tk_read_base *tkr = &tk->tkr_mono; + struct clocksource *clock = tkr->clock; + struct ipipe_hostrt_data data; + struct timespec xt; + + xt.tv_sec = tk->xtime_sec; + xt.tv_nsec = (long)(tkr->xtime_nsec >> tkr->shift); + ipipe_root_only(); + data.live = 1; + data.cycle_last = tkr->cycle_last; + data.mask = clock->mask; + data.mult = tkr->mult; + data.shift = tkr->shift; + data.wall_time_sec = xt.tv_sec; + data.wall_time_nsec = xt.tv_nsec; + data.wall_to_monotonic.tv_sec = tk->wall_to_monotonic.tv_sec; + data.wall_to_monotonic.tv_nsec = tk->wall_to_monotonic.tv_nsec; + __ipipe_notify_kevent(IPIPE_KEVT_HOSTRT, &data); +} + +#endif /* CONFIG_IPIPE_HAVE_HOSTRT */ + +int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, + bool force); + +void __ipipe_timer_refresh_freq(unsigned int hrclock_freq) +{ + struct ipipe_timer *t = __ipipe_raw_cpu_read(percpu_timer); + unsigned long flags; + + if (t && t->refresh_freq) { + t->freq = t->refresh_freq(); + flags = hard_local_irq_save(); + config_pcpu_timer(t, hrclock_freq); + hard_local_irq_restore(flags); + clockevents_program_event(t->host_timer, + t->host_timer->next_event, false); + } +} diff -ruN linux-org/kernel/ipipe/tracer.c linux/kernel/ipipe/tracer.c --- linux-org/kernel/ipipe/tracer.c 1970-01-01 01:00:00.000000000 +0100 +++ linux/kernel/ipipe/tracer.c 2022-03-25 10:15:23.361002980 +0100 @@ -0,0 +1,1486 @@ +/* -*- linux-c -*- + * kernel/ipipe/tracer.c + * + * Copyright (C) 2005 Luotao Fu. + * 2005-2008 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IPIPE_TRACE_PATHS 4 /* Do not lower below 3 */ +#define IPIPE_DEFAULT_ACTIVE 0 +#define IPIPE_DEFAULT_MAX 1 +#define IPIPE_DEFAULT_FROZEN 2 + +#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) +#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) + +#define IPIPE_DEFAULT_PRE_TRACE 10 +#define IPIPE_DEFAULT_POST_TRACE 10 +#define IPIPE_DEFAULT_BACK_TRACE 100 + +#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ +#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ + +#define IPIPE_TFLG_NMI_LOCK 0x0001 +#define IPIPE_TFLG_NMI_HIT 0x0002 +#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 + +#define IPIPE_TFLG_HWIRQ_OFF 0x0100 +#define IPIPE_TFLG_FREEZING 0x0200 +#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */ +#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 +#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */ +#define IPIPE_TFLG_DOMSTATE_BITS 1 + +#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ + (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) +#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ + ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) + +struct ipipe_trace_point { + short type; + short flags; + unsigned long eip; + unsigned long parent_eip; + unsigned long v; + unsigned long long timestamp; +}; + +struct ipipe_trace_path { + volatile int flags; + int dump_lock; /* separated from flags due to cross-cpu access */ + int trace_pos; /* next point to fill */ + int begin, end; /* finalised path begin and end */ + int post_trace; /* non-zero when in post-trace phase */ + unsigned long long length; /* max path length in cycles */ + unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ + unsigned long nmi_saved_parent_eip; + unsigned long nmi_saved_v; + struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; +} ____cacheline_aligned_in_smp; + +enum ipipe_trace_type +{ + IPIPE_TRACE_FUNC = 0, + IPIPE_TRACE_BEGIN, + IPIPE_TRACE_END, + IPIPE_TRACE_FREEZE, + IPIPE_TRACE_SPECIAL, + IPIPE_TRACE_PID, + IPIPE_TRACE_EVENT, +}; + +#define IPIPE_TYPE_MASK 0x0007 +#define IPIPE_TYPE_BITS 3 + +#ifdef CONFIG_IPIPE_TRACE_VMALLOC +static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path); +#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ +static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) = + { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } }; +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + +int ipipe_trace_enable = 0; + +static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE }; +static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX }; +static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN }; +static IPIPE_DEFINE_SPINLOCK(global_path_lock); +static int pre_trace = IPIPE_DEFAULT_PRE_TRACE; +static int post_trace = IPIPE_DEFAULT_POST_TRACE; +static int back_trace = IPIPE_DEFAULT_BACK_TRACE; +static int verbose_trace = 1; +static unsigned long trace_overhead; + +static unsigned long trigger_begin; +static unsigned long trigger_end; + +static DEFINE_MUTEX(out_mutex); +static struct ipipe_trace_path *print_path; +#ifdef CONFIG_IPIPE_TRACE_PANIC +static struct ipipe_trace_path *panic_path; +#endif /* CONFIG_IPIPE_TRACE_PANIC */ +static int print_pre_trace; +static int print_post_trace; + + +static long __ipipe_signed_tsc2us(long long tsc); +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip); + +static inline void store_states(struct ipipe_domain *ipd, + struct ipipe_trace_point *point, int pos) +{ + if (test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(ipd)->status)) + point->flags |= 1 << (pos + IPIPE_TFLG_DOMSTATE_SHIFT); + + if (ipd == __ipipe_current_domain) + point->flags |= pos << IPIPE_TFLG_CURRDOM_SHIFT; +} + +static notrace void +__ipipe_store_domain_states(struct ipipe_trace_point *point) +{ + store_states(ipipe_root_domain, point, 0); + if (ipipe_head_domain != ipipe_root_domain) + store_states(ipipe_head_domain, point, 1); +} + +static notrace int __ipipe_get_free_trace_path(int old, int cpu) +{ + int new_active = old; + struct ipipe_trace_path *tp; + + do { + if (++new_active == IPIPE_TRACE_PATHS) + new_active = 0; + tp = &per_cpu(trace_path, cpu)[new_active]; + } while (new_active == per_cpu(max_path, cpu) || + new_active == per_cpu(frozen_path, cpu) || + tp->dump_lock); + + return new_active; +} + +static notrace void +__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, + struct ipipe_trace_path *old_tp, int old_pos) +{ + int i; + + new_tp->trace_pos = pre_trace+1; + + for (i = new_tp->trace_pos; i > 0; i--) + memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], + &old_tp->point[WRAP_POINT_NO(old_pos-i)], + sizeof(struct ipipe_trace_point)); + + /* mark the end (i.e. the point before point[0]) invalid */ + new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp = tp; + long active = per_cpu(active_path, cpu); + unsigned long long length; + + /* do we have a new worst case? */ + length = tp->point[tp->end].timestamp - + tp->point[tp->begin].timestamp; + if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) { + /* we need protection here against other cpus trying + to start a proc dump */ + raw_spin_lock(&global_path_lock); + + /* active path holds new worst case */ + tp->length = length; + per_cpu(max_path, cpu) = active; + + /* find next unused trace path */ + active = __ipipe_get_free_trace_path(active, cpu); + + raw_spin_unlock(&global_path_lock); + + tp = &per_cpu(trace_path, cpu)[active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + } + + return tp; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp = tp; + long active = per_cpu(active_path, cpu); + int n; + + /* frozen paths have no core (begin=end) */ + tp->begin = tp->end; + + /* we need protection here against other cpus trying + * to set their frozen path or to start a proc dump */ + raw_spin_lock(&global_path_lock); + + per_cpu(frozen_path, cpu) = active; + + /* find next unused trace path */ + active = __ipipe_get_free_trace_path(active, cpu); + + /* check if this is the first frozen path */ + for_each_possible_cpu(n) { + if (n != cpu && + per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0) + tp->end = -1; + } + + raw_spin_unlock(&global_path_lock); + + tp = &per_cpu(trace_path, cpu)[active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + + return tp; +} + +void notrace +__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, + unsigned long parent_eip, unsigned long v) +{ + struct ipipe_trace_path *tp, *old_tp; + int pos, next_pos, begin; + struct ipipe_trace_point *point; + unsigned long flags; + int cpu; + + flags = hard_local_irq_save_notrace(); + + cpu = ipipe_processor_id(); + restart: + tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; + + /* here starts a race window with NMIs - catched below */ + + /* check for NMI recursion */ + if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { + tp->flags |= IPIPE_TFLG_NMI_HIT; + + /* first freeze request from NMI context? */ + if ((type == IPIPE_TRACE_FREEZE) && + !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { + /* save arguments and mark deferred freezing */ + tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ; + tp->nmi_saved_eip = eip; + tp->nmi_saved_parent_eip = parent_eip; + tp->nmi_saved_v = v; + } + return; /* no need for restoring flags inside IRQ */ + } + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (unlikely(tp != + &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) { + /* release lock on wrong path and restart */ + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * => no need to check for pending freeze requests */ + goto restart; + } + + /* get the point buffer */ + pos = tp->trace_pos; + point = &tp->point[pos]; + + /* store all trace point data */ + point->type = type; + point->flags = hard_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0; + point->eip = eip; + point->parent_eip = parent_eip; + point->v = v; + ipipe_read_tsc(point->timestamp); + + __ipipe_store_domain_states(point); + + /* forward to next point buffer */ + next_pos = WRAP_POINT_NO(pos+1); + tp->trace_pos = next_pos; + + /* only mark beginning if we haven't started yet */ + begin = tp->begin; + if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0)) + tp->begin = pos; + + /* end of critical path, start post-trace if not already started */ + if (unlikely(type == IPIPE_TRACE_END) && + (begin >= 0) && !tp->post_trace) + tp->post_trace = post_trace + 1; + + /* freeze only if the slot is free and we are not already freezing */ + if ((unlikely(type == IPIPE_TRACE_FREEZE) || + (unlikely(eip >= trigger_begin && eip <= trigger_end) && + type == IPIPE_TRACE_FUNC)) && + per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 && + !(tp->flags & IPIPE_TFLG_FREEZING)) { + tp->post_trace = post_trace + 1; + tp->flags |= IPIPE_TFLG_FREEZING; + } + + /* enforce end of trace in case of overflow */ + if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) { + tp->end = pos; + goto enforce_end; + } + + /* stop tracing this path if we are in post-trace and + * a) that phase is over now or + * b) a new TRACE_BEGIN came in but we are not freezing this path */ + if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) || + ((type == IPIPE_TRACE_BEGIN) && + !(tp->flags & IPIPE_TFLG_FREEZING))))) { + /* store the path's end (i.e. excluding post-trace) */ + tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace); + + enforce_end: + if (tp->flags & IPIPE_TFLG_FREEZING) + tp = __ipipe_trace_freeze(cpu, tp, pos); + else + tp = __ipipe_trace_end(cpu, tp, pos); + + /* reset the active path, maybe already start a new one */ + tp->begin = (type == IPIPE_TRACE_BEGIN) ? + WRAP_POINT_NO(tp->trace_pos - 1) : -1; + tp->end = -1; + tp->post_trace = 0; + tp->flags = 0; + + /* update active_path not earlier to avoid races with NMIs */ + per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu); + } + + /* we still have old_tp and point, + * let's reset NMI lock and check for catches */ + old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { + /* well, this late tagging may not immediately be visible for + * other cpus already dumping this path - a minor issue */ + point->flags |= IPIPE_TFLG_NMI_HIT; + + /* handle deferred freezing from NMI context */ + if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, + old_tp->nmi_saved_parent_eip, + old_tp->nmi_saved_v); + } + + hard_local_irq_restore_notrace(flags); +} + +static unsigned long __ipipe_global_path_lock(void) +{ + unsigned long flags; + int cpu; + struct ipipe_trace_path *tp; + + raw_spin_lock_irqsave(&global_path_lock, flags); + + cpu = ipipe_processor_id(); + restart: + tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; + + /* here is small race window with NMIs - catched below */ + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) { + /* release lock on wrong path and restart */ + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * => no need to check for pending freeze requests */ + goto restart; + } + + return flags; +} + +static void __ipipe_global_path_unlock(unsigned long flags) +{ + int cpu; + struct ipipe_trace_path *tp; + + /* release spinlock first - it's not involved in the NMI issue */ + __ipipe_spin_unlock_irqbegin(&global_path_lock); + + cpu = ipipe_processor_id(); + tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; + + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* handle deferred freezing from NMI context */ + if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, + tp->nmi_saved_parent_eip, tp->nmi_saved_v); + + /* See __ipipe_spin_lock_irqsave() and friends. */ + __ipipe_spin_unlock_irqcomplete(flags); +} + +void notrace asmlinkage +ipipe_trace_asm(enum ipipe_trace_type type, unsigned long eip, + unsigned long parent_eip, unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(type, eip, parent_eip, v); +} + +void notrace ipipe_trace_begin(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL_GPL(ipipe_trace_begin); + +void notrace ipipe_trace_end(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL_GPL(ipipe_trace_end); + +void notrace ipipe_trace_irqbegin(int irq, struct pt_regs *regs) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_BEGIN, instruction_pointer(regs), + __BUILTIN_RETURN_ADDRESS1, irq); +} +EXPORT_SYMBOL_GPL(ipipe_trace_irqbegin); + +void notrace ipipe_trace_irqend(int irq, struct pt_regs *regs) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_END, instruction_pointer(regs), + __BUILTIN_RETURN_ADDRESS1, irq); +} +EXPORT_SYMBOL_GPL(ipipe_trace_irqend); + +void notrace ipipe_trace_freeze(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL_GPL(ipipe_trace_freeze); + +void notrace ipipe_trace_special(unsigned char id, unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL_GPL(ipipe_trace_special); + +void notrace ipipe_trace_pid(pid_t pid, short prio) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, pid); +} +EXPORT_SYMBOL_GPL(ipipe_trace_pid); + +void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, delay_tsc); +} +EXPORT_SYMBOL_GPL(ipipe_trace_event); + +int ipipe_trace_max_reset(void) +{ + int cpu; + unsigned long flags; + struct ipipe_trace_path *path; + int ret = 0; + + flags = __ipipe_global_path_lock(); + + for_each_possible_cpu(cpu) { + path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; + + if (path->dump_lock) { + ret = -EBUSY; + break; + } + + path->begin = -1; + path->end = -1; + path->trace_pos = 0; + path->length = 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(ipipe_trace_max_reset); + +int ipipe_trace_frozen_reset(void) +{ + int cpu; + unsigned long flags; + struct ipipe_trace_path *path; + int ret = 0; + + flags = __ipipe_global_path_lock(); + + for_each_online_cpu(cpu) { + path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; + + if (path->dump_lock) { + ret = -EBUSY; + break; + } + + path->begin = -1; + path->end = -1; + path->trace_pos = 0; + path->length = 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(ipipe_trace_frozen_reset); + +static void +__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, + int trylock) +{ + struct task_struct *task = NULL; + char buf[8]; + int i; + int locked = 1; + + if (trylock) { + if (!read_trylock(&tasklist_lock)) + locked = 0; + } else + read_lock(&tasklist_lock); + + if (locked) + task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns); + + if (task) + strncpy(task_info, task->comm, 11); + else + strcpy(task_info, "--"); + + if (locked) + read_unlock(&tasklist_lock); + + for (i = strlen(task_info); i < 11; i++) + task_info[i] = ' '; + + sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); + strcpy(task_info + (11 - strlen(buf)), buf); +} + +static void +__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path, + struct ipipe_trace_point *point) +{ + long time; + int type; + + time = __ipipe_signed_tsc2us(point->timestamp - + path->point[path->begin].timestamp + point->v); + type = point->type >> IPIPE_TYPE_BITS; + + if (type == 0) + /* + * Event type #0 is predefined, stands for the next + * timer tick. + */ + sprintf(buf, "tick@%-6ld", time); + else + sprintf(buf, "%3d@%-7ld", type, time); +} + +#ifdef CONFIG_IPIPE_TRACE_PANIC + +void ipipe_trace_panic_freeze(void) +{ + unsigned long flags; + int cpu; + + if (!ipipe_trace_enable) + return; + + ipipe_trace_enable = 0; + flags = hard_local_irq_save_notrace(); + + cpu = ipipe_processor_id(); + + panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; + + hard_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(ipipe_trace_panic_freeze); + +void ipipe_trace_panic_dump(void) +{ + int cnt = back_trace; + int start, pos; + char buf[16]; + + if (!panic_path) + return; + + ipipe_context_check_off(); + + printk("I-pipe tracer log (%d points):\n", cnt); + + start = pos = WRAP_POINT_NO(panic_path->trace_pos-1); + + while (cnt-- > 0) { + struct ipipe_trace_point *point = &panic_path->point[pos]; + long time; + char info[16]; + int i; + + printk(" %c", + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); + + for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) + printk("%c", + (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '#' : '+') : + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '*' : ' ')); + + if (!point->eip) + printk("--\n"); + else { + __ipipe_trace_point_type(buf, point); + printk("%s", buf); + + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + printk(" "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(info, + point, 1); + printk("%s", info); + break; + + case IPIPE_TRACE_EVENT: + __ipipe_get_event_date(info, + panic_path, point); + printk("%s", info); + break; + + default: + printk("0x%08lx ", point->v); + } + + time = __ipipe_signed_tsc2us(point->timestamp - + panic_path->point[start].timestamp); + printk(" %5ld ", time); + + __ipipe_print_symname(NULL, point->eip); + printk(" ("); + __ipipe_print_symname(NULL, point->parent_eip); + printk(")\n"); + } + pos = WRAP_POINT_NO(pos - 1); + } + + panic_path = NULL; +} +EXPORT_SYMBOL_GPL(ipipe_trace_panic_dump); + +#endif /* CONFIG_IPIPE_TRACE_PANIC */ + + +/* --- /proc output --- */ + +static notrace int __ipipe_in_critical_trpath(long point_no) +{ + return ((WRAP_POINT_NO(point_no-print_path->begin) < + WRAP_POINT_NO(print_path->end-print_path->begin)) || + ((print_path->end == print_path->begin) && + (WRAP_POINT_NO(point_no-print_path->end) > + print_post_trace))); +} + +static long __ipipe_signed_tsc2us(long long tsc) +{ + unsigned long long abs_tsc; + long us; + + if (!__ipipe_hrclock_ok()) + return 0; + + /* ipipe_tsc2us works on unsigned => handle sign separately */ + abs_tsc = (tsc >= 0) ? tsc : -tsc; + us = ipipe_tsc2us(abs_tsc); + if (tsc < 0) + return -us; + else + return us; +} + +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) +{ + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + strcpy(buf, "func "); + break; + + case IPIPE_TRACE_BEGIN: + strcpy(buf, "begin "); + break; + + case IPIPE_TRACE_END: + strcpy(buf, "end "); + break; + + case IPIPE_TRACE_FREEZE: + strcpy(buf, "freeze "); + break; + + case IPIPE_TRACE_SPECIAL: + sprintf(buf, "(0x%02x) ", + point->type >> IPIPE_TYPE_BITS); + break; + + case IPIPE_TRACE_PID: + sprintf(buf, "[%5d] ", (pid_t)point->v); + break; + + case IPIPE_TRACE_EVENT: + sprintf(buf, "event "); + break; + } +} + +static void +__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point) +{ + char mark = ' '; + int point_no = point - print_path->point; + int i; + + if (print_path->end == point_no) + mark = '<'; + else if (print_path->begin == point_no) + mark = '>'; + else if (__ipipe_in_critical_trpath(point_no)) + mark = ':'; + seq_printf(m, "%c%c", mark, + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); + + if (!verbose_trace) + return; + + for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) + seq_printf(m, "%c", + (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '#' : '+') : + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); +} + +static void +__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point) +{ + unsigned long delay = 0; + int next; + char *mark = " "; + + next = WRAP_POINT_NO(point+1 - print_path->point); + + if (next != print_path->trace_pos) + delay = ipipe_tsc2ns(print_path->point[next].timestamp - + point->timestamp); + + if (__ipipe_in_critical_trpath(point - print_path->point)) { + if (delay > IPIPE_DELAY_WARN) + mark = "! "; + else if (delay > IPIPE_DELAY_NOTE) + mark = "+ "; + } + seq_puts(m, mark); + + if (verbose_trace) + seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, + (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); + else + seq_puts(m, " "); +} + +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + +#ifdef CONFIG_IPIPE_TRACE_PANIC + if (!m) { + /* panic dump */ + if (sym_name) { + printk("%s+0x%lx", sym_name, offset); + if (modname) + printk(" [%s]", modname); + } else + printk("<%08lx>", eip); + } else +#endif /* CONFIG_IPIPE_TRACE_PANIC */ + { + if (sym_name) { + if (verbose_trace) { + seq_printf(m, "%s+0x%lx", sym_name, offset); + if (modname) + seq_printf(m, " [%s]", modname); + } else + seq_puts(m, sym_name); + } else + seq_printf(m, "<%08lx>", eip); + } +} + +static void __ipipe_print_headline(struct seq_file *m) +{ + const char *name[2]; + + seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu " + "us\n\n", trace_overhead/1000, trace_overhead%1000); + + if (verbose_trace) { + name[0] = ipipe_root_domain->name; + if (ipipe_head_domain != ipipe_root_domain) + name[1] = ipipe_head_domain->name; + else + name[1] = ""; + + seq_printf(m, + " +----- Hard IRQs ('|': locked)\n" + " |+-- %s\n" + " ||+- %s%s\n" + " ||| +---------- " + "Delay flag ('+': > %d us, '!': > %d us)\n" + " ||| | +- " + "NMI noise ('N')\n" + " ||| | |\n" + " Type User Val. Time Delay Function " + "(Parent)\n", + name[1], name[0], + " ('*': domain stalled, '+': current, " + "'#': current+stalled)", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); + } else + seq_printf(m, + " +--------------- Hard IRQs ('|': locked)\n" + " | +- Delay flag " + "('+': > %d us, '!': > %d us)\n" + " | |\n" + " Type Time Function (Parent)\n", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); +} + +static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + + mutex_lock(&out_mutex); + + if (!n) { + struct ipipe_trace_path *tp; + unsigned long length_usecs; + int points, cpu; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags = __ipipe_global_path_lock(); + + /* find the longest of all per-cpu paths */ + print_path = NULL; + for_each_online_cpu(cpu) { + tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; + if ((print_path == NULL) || + (tp->length > print_path->length)) { + print_path = tp; + break; + } + } + print_path->dump_lock = 1; + + __ipipe_global_path_unlock(flags); + + if (!__ipipe_hrclock_ok()) { + seq_printf(m, "No hrclock available, dumping traces disabled\n"); + return NULL; + } + + /* does this path actually contain data? */ + if (print_path->end == print_path->begin) + return NULL; + + /* number of points inside the critical path */ + points = WRAP_POINT_NO(print_path->end-print_path->begin+1); + + /* pre- and post-tracing length, post-trace length was frozen + in __ipipe_trace, pre-trace may have to be reduced due to + buffer overrun */ + print_pre_trace = pre_trace; + print_post_trace = WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace = IPIPE_TRACE_POINTS - 1 - points - + print_post_trace; + + length_usecs = ipipe_tsc2us(print_path->length); + seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe release #%d\n" + "-------------------------------------------------------------\n", + UTS_RELEASE, IPIPE_CORE_RELEASE); + seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: " + "%d (-%d/+%d), Length: %lu us\n", + cpu, print_path->point[print_path->begin].timestamp, + points, print_pre_trace, print_post_trace, length_usecs); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + n)]; +} + +static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos) +{ + loff_t n = ++*pos; + + /* check if we are inside the trace range with the next entry */ + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + *pos)]; +} + +static void __ipipe_prtrace_stop(struct seq_file *m, void *p) +{ + if (print_path) + print_path->dump_lock = 0; + mutex_unlock(&out_mutex); +} + +static int __ipipe_prtrace_show(struct seq_file *m, void *p) +{ + long time; + struct ipipe_trace_point *point = p; + char buf[16]; + + if (!point->eip) { + seq_puts(m, "--\n"); + return 0; + } + + __ipipe_print_pathmark(m, point); + __ipipe_trace_point_type(buf, point); + seq_puts(m, buf); + if (verbose_trace) + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + seq_puts(m, " "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(buf, point, 0); + seq_puts(m, buf); + break; + + case IPIPE_TRACE_EVENT: + __ipipe_get_event_date(buf, print_path, point); + seq_puts(m, buf); + break; + + default: + seq_printf(m, "0x%08lx ", point->v); + } + + time = __ipipe_signed_tsc2us(point->timestamp - + print_path->point[print_path->begin].timestamp); + seq_printf(m, "%5ld", time); + + __ipipe_print_delay(m, point); + __ipipe_print_symname(m, point->eip); + seq_puts(m, " ("); + __ipipe_print_symname(m, point->parent_eip); + seq_puts(m, ")\n"); + + return 0; +} + +static struct seq_operations __ipipe_max_ptrace_ops = { + .start = __ipipe_max_prtrace_start, + .next = __ipipe_prtrace_next, + .stop = __ipipe_prtrace_stop, + .show = __ipipe_prtrace_show +}; + +static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &__ipipe_max_ptrace_ops); +} + +static ssize_t +__ipipe_max_reset(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + mutex_lock(&out_mutex); + ipipe_trace_max_reset(); + mutex_unlock(&out_mutex); + + return count; +} + +static const struct file_operations __ipipe_max_prtrace_fops = { + .open = __ipipe_max_prtrace_open, + .read = seq_read, + .write = __ipipe_max_reset, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + + mutex_lock(&out_mutex); + + if (!n) { + struct ipipe_trace_path *tp; + int cpu; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags = __ipipe_global_path_lock(); + + /* find the first of all per-cpu frozen paths */ + print_path = NULL; + for_each_online_cpu(cpu) { + tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; + if (tp->end >= 0) { + print_path = tp; + break; + } + } + if (print_path) + print_path->dump_lock = 1; + + __ipipe_global_path_unlock(flags); + + if (!print_path) + return NULL; + + if (!__ipipe_hrclock_ok()) { + seq_printf(m, "No hrclock available, dumping traces disabled\n"); + return NULL; + } + + /* back- and post-tracing length, post-trace length was frozen + in __ipipe_trace, back-trace may have to be reduced due to + buffer overrun */ + print_pre_trace = back_trace-1; /* substract freeze point */ + print_post_trace = WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace = IPIPE_TRACE_POINTS - 2 - + print_post_trace; + + seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe release #%d\n" + "------------------------------------------------------------\n", + UTS_RELEASE, IPIPE_CORE_RELEASE); + seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n", + cpu, print_path->point[print_path->begin].timestamp, + print_pre_trace+1, print_post_trace); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >= print_pre_trace + 1 + print_post_trace) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin- + print_pre_trace+n)]; +} + +static struct seq_operations __ipipe_frozen_ptrace_ops = { + .start = __ipipe_frozen_prtrace_start, + .next = __ipipe_prtrace_next, + .stop = __ipipe_prtrace_stop, + .show = __ipipe_prtrace_show +}; + +static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &__ipipe_frozen_ptrace_ops); +} + +static ssize_t +__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, pbuffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + ipipe_trace_frozen_reset(); + if (val > 0) + ipipe_trace_freeze(-1); + mutex_unlock(&out_mutex); + + return count; +} + +static const struct file_operations __ipipe_frozen_prtrace_fops = { + .open = __ipipe_frozen_prtrace_open, + .read = seq_read, + .write = __ipipe_frozen_ctrl, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __ipipe_rd_proc_val(struct seq_file *p, void *data) +{ + seq_printf(p, "%u\n", *(int *)p->private); + return 0; +} + +static ssize_t +__ipipe_wr_proc_val(struct file *file, const char __user *buffer, + size_t count, loff_t *data) +{ + struct seq_file *p = file->private_data; + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, buffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + *(int *)p->private = val; + mutex_unlock(&out_mutex); + + return count; +} + +static int __ipipe_rw_proc_val_open(struct inode *inode, struct file *file) +{ + return single_open(file, __ipipe_rd_proc_val, PDE_DATA(inode)); +} + +static const struct file_operations __ipipe_rw_proc_val_ops = { + .open = __ipipe_rw_proc_val_open, + .read = seq_read, + .write = __ipipe_wr_proc_val, + .llseek = seq_lseek, + .release = single_release, +}; + +static void __init +__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, + const char *name, int *value_ptr) +{ + proc_create_data(name, 0644, trace_dir, &__ipipe_rw_proc_val_ops, + value_ptr); +} + +static int __ipipe_rd_trigger(struct seq_file *p, void *data) +{ + char str[KSYM_SYMBOL_LEN]; + + if (trigger_begin) { + sprint_symbol(str, trigger_begin); + seq_printf(p, "%s\n", str); + } + return 0; +} + +static ssize_t +__ipipe_wr_trigger(struct file *file, const char __user *buffer, + size_t count, loff_t *data) +{ + char buf[KSYM_SYMBOL_LEN]; + unsigned long begin, end; + + if (count > sizeof(buf) - 1) + count = sizeof(buf) - 1; + if (copy_from_user(buf, buffer, count)) + return -EFAULT; + buf[count] = 0; + if (buf[count-1] == '\n') + buf[count-1] = 0; + + begin = kallsyms_lookup_name(buf); + if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL)) + return -ENOENT; + end += begin - 1; + + mutex_lock(&out_mutex); + /* invalidate the current range before setting a new one */ + trigger_end = 0; + wmb(); + ipipe_trace_frozen_reset(); + + /* set new range */ + trigger_begin = begin; + wmb(); + trigger_end = end; + mutex_unlock(&out_mutex); + + return count; +} + +static int __ipipe_rw_trigger_open(struct inode *inode, struct file *file) +{ + return single_open(file, __ipipe_rd_trigger, NULL); +} + +static const struct file_operations __ipipe_rw_trigger_ops = { + .open = __ipipe_rw_trigger_open, + .read = seq_read, + .write = __ipipe_wr_trigger, + .llseek = seq_lseek, + .release = single_release, +}; + + +#ifdef CONFIG_IPIPE_TRACE_MCOUNT +static void notrace +ipipe_trace_function(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0); +} + +static struct ftrace_ops ipipe_trace_ops = { + .func = ipipe_trace_function, + .flags = FTRACE_OPS_FL_IPIPE_EXCLUSIVE, +}; + +static ssize_t __ipipe_wr_enable(struct file *file, const char __user *buffer, + size_t count, loff_t *data) +{ + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, buffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + + if (ipipe_trace_enable) { + if (!val) + unregister_ftrace_function(&ipipe_trace_ops); + } else if (val) + register_ftrace_function(&ipipe_trace_ops); + + ipipe_trace_enable = val; + + mutex_unlock(&out_mutex); + + return count; +} + +static const struct file_operations __ipipe_rw_enable_ops = { + .open = __ipipe_rw_proc_val_open, + .read = seq_read, + .write = __ipipe_wr_enable, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ + +extern struct proc_dir_entry *ipipe_proc_root; + +void __init __ipipe_tracer_hrclock_initialized(void) +{ + unsigned long long start, end, min = ULLONG_MAX; + int i; + +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + if (!per_cpu(trace_path, 0)) + return; +#endif + /* Calculate minimum overhead of __ipipe_trace() */ + hard_local_irq_disable(); + for (i = 0; i < 100; i++) { + ipipe_read_tsc(start); + __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, 0); + ipipe_read_tsc(end); + + end -= start; + if (end < min) + min = end; + } + hard_local_irq_enable(); + trace_overhead = ipipe_tsc2ns(min); +} + +void __init __ipipe_init_tracer(void) +{ + struct proc_dir_entry *trace_dir; +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + int cpu, path; +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + for_each_possible_cpu(cpu) { + struct ipipe_trace_path *tp_buf; + + tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) * + IPIPE_TRACE_PATHS, cpu_to_node(cpu)); + if (!tp_buf) { + pr_err("I-pipe: " + "insufficient memory for trace buffer.\n"); + return; + } + memset(tp_buf, 0, + sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); + for (path = 0; path < IPIPE_TRACE_PATHS; path++) { + tp_buf[path].begin = -1; + tp_buf[path].end = -1; + } + per_cpu(trace_path, cpu) = tp_buf; + } +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + + if (__ipipe_hrclock_ok() && !trace_overhead) + __ipipe_tracer_hrclock_initialized(); + +#ifdef CONFIG_IPIPE_TRACE_ENABLE + ipipe_trace_enable = 1; +#ifdef CONFIG_IPIPE_TRACE_MCOUNT + ftrace_enabled = 1; + register_ftrace_function(&ipipe_trace_ops); +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ +#endif /* CONFIG_IPIPE_TRACE_ENABLE */ + + trace_dir = proc_mkdir("trace", ipipe_proc_root); + + proc_create("max", 0644, trace_dir, &__ipipe_max_prtrace_fops); + proc_create("frozen", 0644, trace_dir, &__ipipe_frozen_prtrace_fops); + + proc_create("trigger", 0644, trace_dir, &__ipipe_rw_trigger_ops); + + __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", + &pre_trace); + __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", + &post_trace); + __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", + &back_trace); + __ipipe_create_trace_proc_val(trace_dir, "verbose", + &verbose_trace); +#ifdef CONFIG_IPIPE_TRACE_MCOUNT + proc_create_data("enable", 0644, trace_dir, &__ipipe_rw_enable_ops, + &ipipe_trace_enable); +#else /* !CONFIG_IPIPE_TRACE_MCOUNT */ + __ipipe_create_trace_proc_val(trace_dir, "enable", + &ipipe_trace_enable); +#endif /* !CONFIG_IPIPE_TRACE_MCOUNT */ +} diff -ruN linux-org/kernel/irq/chip.c linux/kernel/irq/chip.c --- linux-org/kernel/irq/chip.c 2022-03-25 09:55:42.681450513 +0100 +++ linux/kernel/irq/chip.c 2022-03-25 10:15:23.361002980 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -50,6 +51,10 @@ if (!chip) chip = &no_irq_chip; + else + WARN_ONCE(IS_ENABLED(CONFIG_IPIPE) && + (chip->flags & IRQCHIP_PIPELINE_SAFE) == 0, + "irqchip %s is not pipeline-safe!", chip->name); desc->irq_data.chip = chip; irq_put_desc_unlock(desc, flags); @@ -157,14 +162,6 @@ } EXPORT_SYMBOL(irq_set_chip_data); -struct irq_data *irq_get_irq_data(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - return desc ? &desc->irq_data : NULL; -} -EXPORT_SYMBOL_GPL(irq_get_irq_data); - static void irq_state_clr_disabled(struct irq_desc *desc) { irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED); @@ -238,9 +235,14 @@ irq_domain_activate_irq(d); if (d->chip->irq_startup) { + unsigned long flags = hard_cond_local_irq_save(); ret = d->chip->irq_startup(d); irq_state_clr_disabled(desc); irq_state_clr_masked(desc); + hard_cond_local_irq_restore(flags); +#ifdef CONFIG_IPIPE + desc->istate &= ~IPIPE_IRQS_NEEDS_STARTUP; +#endif } else { irq_enable(desc); } @@ -288,6 +290,9 @@ desc->irq_data.chip->irq_shutdown(&desc->irq_data); irq_state_set_disabled(desc); irq_state_set_masked(desc); +#ifdef CONFIG_IPIPE + desc->istate |= IPIPE_IRQS_NEEDS_STARTUP; +#endif } else { __irq_disable(desc, true); } @@ -304,6 +309,8 @@ void irq_enable(struct irq_desc *desc) { + unsigned long flags = hard_cond_local_irq_save(); + if (!irqd_irq_disabled(&desc->irq_data)) { unmask_irq(desc); } else { @@ -315,10 +322,14 @@ unmask_irq(desc); } } + + hard_cond_local_irq_restore(flags); } static void __irq_disable(struct irq_desc *desc, bool mask) { + unsigned long flags = hard_cond_local_irq_save(); + if (irqd_irq_disabled(&desc->irq_data)) { if (mask) mask_irq(desc); @@ -331,6 +342,8 @@ mask_irq(desc); } } + + hard_cond_local_irq_restore(flags); } /** @@ -360,11 +373,13 @@ void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu) { + unsigned long flags = hard_cond_local_irq_save(); if (desc->irq_data.chip->irq_enable) desc->irq_data.chip->irq_enable(&desc->irq_data); else desc->irq_data.chip->irq_unmask(&desc->irq_data); cpumask_set_cpu(cpu, desc->percpu_enabled); + hard_cond_local_irq_restore(flags); } void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu) @@ -401,12 +416,16 @@ void unmask_irq(struct irq_desc *desc) { + unsigned long flags; + if (!irqd_irq_masked(&desc->irq_data)) return; if (desc->irq_data.chip->irq_unmask) { + flags = hard_cond_local_irq_save(); desc->irq_data.chip->irq_unmask(&desc->irq_data); irq_state_clr_masked(desc); + hard_cond_local_irq_restore(flags); } } @@ -603,7 +622,9 @@ void handle_level_irq(struct irq_desc *desc) { raw_spin_lock(&desc->lock); +#ifndef CONFIG_IPIPE mask_ack_irq(desc); +#endif if (!irq_may_run(desc)) goto out_unlock; @@ -639,7 +660,16 @@ static inline void preflow_handler(struct irq_desc *desc) { } #endif -static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) +#ifdef CONFIG_IPIPE +static void cond_release_fasteoi_irq(struct irq_desc *desc, + struct irq_chip *chip) +{ + if (chip->irq_release && + !irqd_irq_disabled(&desc->irq_data) && !desc->threads_oneshot) + chip->irq_release(&desc->irq_data); +} +#else +static inline void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) { if (!(desc->istate & IRQS_ONESHOT)) { chip->irq_eoi(&desc->irq_data); @@ -659,6 +689,7 @@ chip->irq_eoi(&desc->irq_data); } } +#endif /* !CONFIG_IPIPE */ /** * handle_fasteoi_irq - irq handler for transparent controllers @@ -691,13 +722,23 @@ } kstat_incr_irqs_this_cpu(desc); +#ifndef CONFIG_IPIPE if (desc->istate & IRQS_ONESHOT) mask_irq(desc); +#endif preflow_handler(desc); handle_irq_event(desc); +#ifdef CONFIG_IPIPE + /* + * IRQCHIP_EOI_IF_HANDLED is ignored as the I-pipe always + * sends EOI. + */ + cond_release_fasteoi_irq(desc, chip); +#else /* !CONFIG_IPIPE */ cond_unmask_eoi_irq(desc, chip); +#endif /* !CONFIG_IPIPE */ raw_spin_unlock(&desc->lock); return; @@ -748,7 +789,9 @@ kstat_incr_irqs_this_cpu(desc); /* Start handling the irq */ +#ifndef CONFIG_IPIPE desc->irq_data.chip->irq_ack(&desc->irq_data); +#endif do { if (unlikely(!desc->action)) { @@ -836,6 +879,11 @@ kstat_incr_irqs_this_cpu(desc); +#ifdef CONFIG_IPIPE + (void)chip; + handle_irq_event_percpu(desc); + desc->ipipe_end(desc); +#else if (chip->irq_ack) chip->irq_ack(&desc->irq_data); @@ -843,6 +891,7 @@ if (chip->irq_eoi) chip->irq_eoi(&desc->irq_data); +#endif } /** @@ -865,13 +914,20 @@ kstat_incr_irqs_this_cpu(desc); +#ifndef CONFIG_IPIPE if (chip->irq_ack) chip->irq_ack(&desc->irq_data); +#endif if (likely(action)) { trace_irq_handler_entry(irq, action); res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); trace_irq_handler_exit(irq, action, res); +#ifdef CONFIG_IPIPE + (void)chip; + desc->ipipe_end(desc); + return; +#endif } else { unsigned int cpu = smp_processor_id(); bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); @@ -887,6 +943,156 @@ chip->irq_eoi(&desc->irq_data); } +#ifdef CONFIG_IPIPE + +void __ipipe_ack_level_irq(struct irq_desc *desc) +{ + mask_ack_irq(desc); +} + +void __ipipe_end_level_irq(struct irq_desc *desc) +{ + desc->irq_data.chip->irq_unmask(&desc->irq_data); +} + +void __ipipe_ack_fasteoi_irq(struct irq_desc *desc) +{ + desc->irq_data.chip->irq_hold(&desc->irq_data); +} + +void __ipipe_end_fasteoi_irq(struct irq_desc *desc) +{ + if (desc->irq_data.chip->irq_release) + desc->irq_data.chip->irq_release(&desc->irq_data); +} + +void __ipipe_ack_edge_irq(struct irq_desc *desc) +{ + desc->irq_data.chip->irq_ack(&desc->irq_data); +} + +void __ipipe_ack_percpu_irq(struct irq_desc *desc) +{ + if (desc->irq_data.chip->irq_ack) + desc->irq_data.chip->irq_ack(&desc->irq_data); + + if (desc->irq_data.chip->irq_eoi) + desc->irq_data.chip->irq_eoi(&desc->irq_data); +} + +void __ipipe_nop_irq(struct irq_desc *desc) +{ +} + +void __ipipe_chained_irq(struct irq_desc *desc) +{ + /* + * XXX: Do NOT fold this into __ipipe_nop_irq(), see + * ipipe_chained_irq_p(). + */ +} + +static void __ipipe_ack_bad_irq(struct irq_desc *desc) +{ + handle_bad_irq(desc); + WARN_ON_ONCE(1); +} + +irq_flow_handler_t +__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) +{ + if (unlikely(handle == NULL)) { + desc->ipipe_ack = __ipipe_ack_bad_irq; + desc->ipipe_end = __ipipe_nop_irq; + } else { + if (is_chained) { + desc->ipipe_ack = handle; + desc->ipipe_end = __ipipe_nop_irq; + handle = __ipipe_chained_irq; + } else if (handle == handle_simple_irq) { + desc->ipipe_ack = __ipipe_nop_irq; + desc->ipipe_end = __ipipe_nop_irq; + } else if (handle == handle_level_irq) { + desc->ipipe_ack = __ipipe_ack_level_irq; + desc->ipipe_end = __ipipe_end_level_irq; + } else if (handle == handle_edge_irq) { + desc->ipipe_ack = __ipipe_ack_edge_irq; + desc->ipipe_end = __ipipe_nop_irq; + } else if (handle == handle_fasteoi_irq) { + desc->ipipe_ack = __ipipe_ack_fasteoi_irq; + desc->ipipe_end = __ipipe_end_fasteoi_irq; + } else if (handle == handle_percpu_irq || + handle == handle_percpu_devid_irq) { + if (irq_desc_get_chip(desc) && + irq_desc_get_chip(desc)->irq_hold) { + desc->ipipe_ack = __ipipe_ack_fasteoi_irq; + desc->ipipe_end = __ipipe_end_fasteoi_irq; + } else { + desc->ipipe_ack = __ipipe_ack_percpu_irq; + desc->ipipe_end = __ipipe_nop_irq; + } + } else if (irq_desc_get_chip(desc) == &no_irq_chip) { + desc->ipipe_ack = __ipipe_nop_irq; + desc->ipipe_end = __ipipe_nop_irq; + } else { + desc->ipipe_ack = __ipipe_ack_bad_irq; + desc->ipipe_end = __ipipe_nop_irq; + } + } + + /* Suppress intermediate trampoline routine. */ + ipipe_root_domain->irqs[desc->irq_data.irq].ackfn = desc->ipipe_ack; + + return handle; +} + +void ipipe_enable_irq(unsigned int irq) +{ + struct irq_desc *desc; + struct irq_chip *chip; + unsigned long flags; + + desc = irq_to_desc(irq); + if (desc == NULL) + return; + + chip = irq_desc_get_chip(desc); + + if (chip->irq_startup && (desc->istate & IPIPE_IRQS_NEEDS_STARTUP)) { + + ipipe_root_only(); + + raw_spin_lock_irqsave(&desc->lock, flags); + if (desc->istate & IPIPE_IRQS_NEEDS_STARTUP) { + desc->istate &= ~IPIPE_IRQS_NEEDS_STARTUP; + chip->irq_startup(&desc->irq_data); + } + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return; + } + + if (WARN_ON_ONCE(chip->irq_enable == NULL && chip->irq_unmask == NULL)) + return; + + if (chip->irq_enable) + chip->irq_enable(&desc->irq_data); + else + chip->irq_unmask(&desc->irq_data); +} +EXPORT_SYMBOL_GPL(ipipe_enable_irq); + +#else /* !CONFIG_IPIPE */ + +irq_flow_handler_t +__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) +{ + return handle; +} + +#endif /* !CONFIG_IPIPE */ +EXPORT_SYMBOL_GPL(__fixup_irq_handler); + static void __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained, const char *name) @@ -921,6 +1127,8 @@ return; } + handle = __fixup_irq_handler(desc, handle, is_chained); + /* Uninstall? */ if (handle == handle_bad_irq) { if (desc->irq_data.chip != &no_irq_chip) @@ -1256,6 +1464,20 @@ } EXPORT_SYMBOL_GPL(irq_chip_mask_parent); +#ifdef CONFIG_IPIPE +void irq_chip_hold_parent(struct irq_data *data) +{ + data = data->parent_data; + data->chip->irq_hold(data); +} + +void irq_chip_release_parent(struct irq_data *data) +{ + data = data->parent_data; + data->chip->irq_release(data); +} +#endif + /** * irq_chip_unmask_parent - Unmask the parent interrupt * @data: Pointer to interrupt specific data diff -ruN linux-org/kernel/irq/dummychip.c linux/kernel/irq/dummychip.c --- linux-org/kernel/irq/dummychip.c 2022-03-25 09:55:42.681450513 +0100 +++ linux/kernel/irq/dummychip.c 2022-03-25 10:15:23.361002980 +0100 @@ -42,7 +42,7 @@ .irq_enable = noop, .irq_disable = noop, .irq_ack = ack_bad, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, }; /* @@ -58,6 +58,6 @@ .irq_ack = noop, .irq_mask = noop, .irq_unmask = noop, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE, }; EXPORT_SYMBOL_GPL(dummy_irq_chip); diff -ruN linux-org/kernel/irq/generic-chip.c linux/kernel/irq/generic-chip.c --- linux-org/kernel/irq/generic-chip.c 2022-03-25 09:55:42.681450513 +0100 +++ linux/kernel/irq/generic-chip.c 2022-03-25 10:15:23.361002980 +0100 @@ -36,12 +36,13 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct irq_chip_type *ct = irq_data_get_chip_type(d); + unsigned long flags; u32 mask = d->mask; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); irq_reg_writel(gc, mask, ct->regs.disable); *ct->mask_cache &= ~mask; - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } /** @@ -55,12 +56,13 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct irq_chip_type *ct = irq_data_get_chip_type(d); + unsigned long flags; u32 mask = d->mask; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); *ct->mask_cache |= mask; irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit); @@ -75,12 +77,13 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct irq_chip_type *ct = irq_data_get_chip_type(d); + unsigned long flags; u32 mask = d->mask; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); *ct->mask_cache &= ~mask; irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit); @@ -95,12 +98,13 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct irq_chip_type *ct = irq_data_get_chip_type(d); + unsigned long flags; u32 mask = d->mask; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); irq_reg_writel(gc, mask, ct->regs.enable); *ct->mask_cache |= mask; - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } /** @@ -111,11 +115,12 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct irq_chip_type *ct = irq_data_get_chip_type(d); + unsigned long flags; u32 mask = d->mask; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit); @@ -127,11 +132,12 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct irq_chip_type *ct = irq_data_get_chip_type(d); + unsigned long flags; u32 mask = ~d->mask; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } /** @@ -150,13 +156,14 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct irq_chip_type *ct = irq_data_get_chip_type(d); + unsigned long flags; u32 mask = d->mask; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); irq_reg_writel(gc, mask, ct->regs.disable); *ct->mask_cache &= ~mask; irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } /** @@ -167,11 +174,12 @@ { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct irq_chip_type *ct = irq_data_get_chip_type(d); + unsigned long flags; u32 mask = d->mask; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); irq_reg_writel(gc, mask, ct->regs.eoi); - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); } /** @@ -186,17 +194,18 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on) { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + unsigned long flags; u32 mask = d->mask; if (!(mask & gc->wake_enabled)) return -EINVAL; - irq_gc_lock(gc); + flags = irq_gc_lock(gc); if (on) gc->wake_active |= mask; else gc->wake_active &= ~mask; - irq_gc_unlock(gc); + irq_gc_unlock(gc, flags); return 0; } diff -ruN linux-org/kernel/irq/internals.h linux/kernel/irq/internals.h --- linux-org/kernel/irq/internals.h 2022-03-25 09:55:42.681450513 +0100 +++ linux/kernel/irq/internals.h 2022-03-25 10:15:23.361002980 +0100 @@ -60,6 +60,7 @@ IRQS_PENDING = 0x00000200, IRQS_SUSPENDED = 0x00000800, IRQS_TIMINGS = 0x00001000, + IPIPE_IRQS_NEEDS_STARTUP= 0x80000000, }; #include "debug.h" diff -ruN linux-org/kernel/irq/irqdesc.c linux/kernel/irq/irqdesc.c --- linux-org/kernel/irq/irqdesc.c 2022-03-25 09:55:42.681450513 +0100 +++ linux/kernel/irq/irqdesc.c 2022-03-25 10:15:23.361002980 +0100 @@ -126,6 +126,9 @@ for_each_possible_cpu(cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; desc_smp_init(desc, node, affinity); +#ifdef CONFIG_IPIPE + desc->istate |= IPIPE_IRQS_NEEDS_STARTUP; +#endif } int nr_irqs = NR_IRQS; @@ -541,11 +544,13 @@ return arch_early_irq_init(); } +#ifndef CONFIG_IPIPE struct irq_desc *irq_to_desc(unsigned int irq) { return (irq < NR_IRQS) ? irq_desc + irq : NULL; } EXPORT_SYMBOL(irq_to_desc); +#endif /* CONFIG_IPIPE */ static void free_desc(unsigned int irq) { diff -ruN linux-org/kernel/irq/manage.c linux/kernel/irq/manage.c --- linux-org/kernel/irq/manage.c 2022-03-25 09:55:42.681450513 +0100 +++ linux/kernel/irq/manage.c 2022-03-25 10:15:23.361002980 +0100 @@ -818,9 +818,14 @@ desc->threads_oneshot &= ~action->thread_mask; +#ifndef CONFIG_IPIPE if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) && irqd_irq_masked(&desc->irq_data)) unmask_threaded_irq(desc); +#else /* CONFIG_IPIPE */ + if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data)) + desc->ipipe_end(desc); +#endif /* CONFIG_IPIPE */ out_unlock: raw_spin_unlock_irq(&desc->lock); diff -ruN linux-org/kernel/locking/lockdep.c linux/kernel/locking/lockdep.c --- linux-org/kernel/locking/lockdep.c 2022-03-25 09:55:42.689450483 +0100 +++ linux/kernel/locking/lockdep.c 2022-03-25 10:15:23.361002980 +0100 @@ -2886,6 +2886,9 @@ __visible void trace_hardirqs_on_caller(unsigned long ip) { + if (!ipipe_root_p) + return; + time_hardirqs_on(CALLER_ADDR0, ip); if (unlikely(!debug_locks || current->lockdep_recursion)) @@ -2906,7 +2909,7 @@ * already enabled, yet we find the hardware thinks they are in fact * enabled.. someone messed up their IRQ state tracing. */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled())) return; /* @@ -2939,7 +2942,12 @@ */ __visible void trace_hardirqs_off_caller(unsigned long ip) { - struct task_struct *curr = current; + struct task_struct *curr; + + if (!ipipe_root_p) + return; + + curr = current; time_hardirqs_off(CALLER_ADDR0, ip); @@ -2950,7 +2958,7 @@ * So we're supposed to get called after you mask local IRQs, but for * some reason the hardware doesn't quite think you did a proper job. */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled())) return; if (curr->hardirqs_enabled) { @@ -2986,7 +2994,7 @@ * We fancy IRQs being disabled here, see softirq.c, avoids * funny state and nesting things. */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled())) return; if (curr->softirqs_enabled) { @@ -3025,7 +3033,7 @@ /* * We fancy IRQs being disabled here, see softirq.c */ - if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled())) return; if (curr->softirqs_enabled) { diff -ruN linux-org/kernel/locking/spinlock.c linux/kernel/locking/spinlock.c --- linux-org/kernel/locking/spinlock.c 2022-03-25 09:55:42.697450453 +0100 +++ linux/kernel/locking/spinlock.c 2022-03-25 10:15:23.361002980 +0100 @@ -27,7 +27,9 @@ * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are * not re-enabled during lock-acquire (which the preempt-spin-ops do): */ -#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) +#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ + defined(CONFIG_DEBUG_LOCK_ALLOC) || \ + defined(CONFIG_IPIPE) /* * The __lock_function inlines are taken from * include/linux/spinlock_api_smp.h diff -ruN linux-org/kernel/Makefile linux/kernel/Makefile --- linux-org/kernel/Makefile 2022-03-25 09:55:42.653450618 +0100 +++ linux/kernel/Makefile 2022-03-25 10:15:23.357002995 +0100 @@ -87,6 +87,7 @@ obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RELAY) += relay.o +obj-$(CONFIG_IPIPE) += ipipe/ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o diff -ruN linux-org/kernel/module.c linux/kernel/module.c --- linux-org/kernel/module.c 2022-03-25 09:55:42.697450453 +0100 +++ linux/kernel/module.c 2022-03-25 10:15:23.361002980 +0100 @@ -1109,7 +1109,7 @@ bool ret = true; if (module) { - preempt_disable(); + unsigned long flags = hard_preempt_disable(); /* Note: here, we can fail to get a reference */ if (likely(module_is_live(module) && atomic_inc_not_zero(&module->refcnt) != 0)) @@ -1117,7 +1117,7 @@ else ret = false; - preempt_enable(); + hard_preempt_enable(flags); } return ret; } @@ -1128,11 +1128,11 @@ int ret; if (module) { - preempt_disable(); + unsigned long flags = hard_preempt_disable(); ret = atomic_dec_if_positive(&module->refcnt); WARN_ON(ret < 0); /* Failed to put refcount */ trace_module_put(module, _RET_IP_); - preempt_enable(); + hard_preempt_enable(flags); } } EXPORT_SYMBOL(module_put); diff -ruN linux-org/kernel/panic.c linux/kernel/panic.c --- linux-org/kernel/panic.c 2022-03-25 09:55:42.701450437 +0100 +++ linux/kernel/panic.c 2022-03-25 10:15:23.361002980 +0100 @@ -19,8 +19,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -471,6 +473,8 @@ { tracing_off(); /* can't trust the integrity of the kernel anymore: */ + ipipe_trace_panic_freeze(); + ipipe_disable_context_check(); debug_locks_off(); do_oops_enter_exit(); } diff -ruN linux-org/kernel/power/hibernate.c linux/kernel/power/hibernate.c --- linux-org/kernel/power/hibernate.c 2022-03-25 09:55:42.701450437 +0100 +++ linux/kernel/power/hibernate.c 2022-03-25 10:15:23.361002980 +0100 @@ -286,6 +286,7 @@ goto Enable_cpus; local_irq_disable(); + hard_cond_local_irq_disable(); error = syscore_suspend(); if (error) { @@ -445,6 +446,7 @@ goto Enable_cpus; local_irq_disable(); + hard_cond_local_irq_disable(); error = syscore_suspend(); if (error) @@ -563,6 +565,7 @@ goto Enable_cpus; local_irq_disable(); + hard_cond_local_irq_disable(); syscore_suspend(); if (pm_wakeup_pending()) { error = -EAGAIN; diff -ruN linux-org/kernel/printk/printk.c linux/kernel/printk/printk.c --- linux-org/kernel/printk/printk.c 2022-03-25 09:55:42.705450423 +0100 +++ linux/kernel/printk/printk.c 2022-03-25 10:15:23.361002980 +0100 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -1799,6 +1800,65 @@ } EXPORT_SYMBOL_GPL(vprintk_default); +#ifdef CONFIG_IPIPE + +extern int __ipipe_printk_bypass; + +static IPIPE_DEFINE_SPINLOCK(__ipipe_printk_lock); + +static int __ipipe_printk_fill; + +static char __ipipe_printk_buf[__LOG_BUF_LEN]; + +int __ipipe_log_printk(const char *fmt, va_list args) +{ + int ret = 0, fbytes, oldcount; + unsigned long flags; + + raw_spin_lock_irqsave(&__ipipe_printk_lock, flags); + + oldcount = __ipipe_printk_fill; + fbytes = __LOG_BUF_LEN - oldcount; + if (fbytes > 1) { + ret = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill, + fbytes, fmt, args) + 1; + __ipipe_printk_fill += ret; + } + + raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags); + + if (oldcount == 0) + ipipe_raise_irq(__ipipe_printk_virq); + + return ret; +} + +void __ipipe_flush_printk (unsigned virq, void *cookie) +{ + char *p = __ipipe_printk_buf; + int len, lmax, out = 0; + unsigned long flags; + + goto start; + do { + raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags); +start: + lmax = __ipipe_printk_fill; + while (out < lmax) { + len = strlen(p) + 1; + printk("%s",p); + p += len; + out += len; + } + raw_spin_lock_irqsave(&__ipipe_printk_lock, flags); + } + while (__ipipe_printk_fill != lmax); + + __ipipe_printk_fill = 0; + + raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags); +} + /** * printk - print a kernel message * @fmt: format string @@ -1820,6 +1880,44 @@ * * See the vsnprintf() documentation for format string extensions over C99. */ + +asmlinkage __visible int printk(const char *fmt, ...) +{ + int sprintk = 1, cs = -1; + unsigned long flags; + va_list args; + int ret; + + va_start(args, fmt); + + flags = hard_local_irq_save(); + + if (__ipipe_printk_bypass || oops_in_progress) + cs = ipipe_disable_context_check(); + else if (__ipipe_current_domain == ipipe_root_domain) { + if (ipipe_head_domain != ipipe_root_domain && + (raw_irqs_disabled_flags(flags) || + test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status))) + sprintk = 0; + } else + sprintk = 0; + + hard_local_irq_restore(flags); + + if (sprintk) { + ret = vprintk_func(fmt, args); + if (cs != -1) + ipipe_restore_context_check(cs); + } else + ret = __ipipe_log_printk(fmt, args); + + va_end(args); + + return ret; +} + +#else /* !CONFIG_IPIPE */ + asmlinkage __visible int printk(const char *fmt, ...) { va_list args; @@ -1831,6 +1929,9 @@ return r; } + +#endif /* CONFIG_IPIPE */ + EXPORT_SYMBOL(printk); #else /* CONFIG_PRINTK */ @@ -1883,6 +1984,63 @@ } #endif +#ifdef CONFIG_RAW_PRINTK +static struct console *raw_console; +static IPIPE_DEFINE_RAW_SPINLOCK(raw_console_lock); + +void raw_vprintk(const char *fmt, va_list ap) +{ + unsigned long flags; + char buf[256]; + int n; + + if (raw_console == NULL || console_suspended) + return; + + n = vscnprintf(buf, sizeof(buf), fmt, ap); + touch_nmi_watchdog(); + raw_spin_lock_irqsave(&raw_console_lock, flags); + if (raw_console) + raw_console->write_raw(raw_console, buf, n); + raw_spin_unlock_irqrestore(&raw_console_lock, flags); +} + +asmlinkage __visible void raw_printk(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + raw_vprintk(fmt, ap); + va_end(ap); +} +EXPORT_SYMBOL(raw_printk); + +static inline void register_raw_console(struct console *newcon) +{ + if ((newcon->flags & CON_RAW) != 0 && newcon->write_raw) + raw_console = newcon; +} + +static inline void unregister_raw_console(struct console *oldcon) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&raw_console_lock, flags); + if (oldcon == raw_console) + raw_console = NULL; + raw_spin_unlock_irqrestore(&raw_console_lock, flags); +} + +#else + +static inline void register_raw_console(struct console *newcon) +{ } + +static inline void unregister_raw_console(struct console *oldcon) +{ } + +#endif + static int __add_preferred_console(char *name, int idx, char *options, char *brl_options) { @@ -2511,6 +2669,9 @@ console_drivers->next = newcon; } + /* The latest raw console to register is current. */ + register_raw_console(newcon); + if (newcon->flags & CON_EXTENDED) if (!nr_ext_console_drivers++) pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n"); @@ -2566,6 +2727,8 @@ (console->flags & CON_BOOT) ? "boot" : "" , console->name, console->index); + unregister_raw_console(console); + res = _braille_unregister_console(console); if (res) return res; @@ -3128,6 +3291,9 @@ printk("%sHardware name: %s\n", log_lvl, dump_stack_arch_desc_str); +#ifdef CONFIG_IPIPE + printk("I-pipe domain: %s\n", ipipe_current_domain->name); +#endif print_worker_info(log_lvl, current); } diff -ruN linux-org/kernel/rcu/Kconfig.debug linux/kernel/rcu/Kconfig.debug --- linux-org/kernel/rcu/Kconfig.debug 2022-03-25 09:55:42.705450423 +0100 +++ linux/kernel/rcu/Kconfig.debug 2022-03-25 10:15:23.361002980 +0100 @@ -5,7 +5,7 @@ menu "RCU Debugging" config PROVE_RCU - def_bool PROVE_LOCKING + def_bool PROVE_LOCKING && !IPIPE config TORTURE_TEST tristate diff -ruN linux-org/kernel/sched/core.c linux/kernel/sched/core.c --- linux-org/kernel/sched/core.c 2022-03-25 09:55:42.717450377 +0100 +++ linux/kernel/sched/core.c 2022-03-25 10:15:23.365002965 +0100 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -1110,10 +1111,13 @@ } /* Can the task run on the task's current CPU? If so, we're done */ - if (cpumask_test_cpu(task_cpu(p), new_mask)) + if (cpumask_test_cpu(task_cpu(p), new_mask)) { + __ipipe_report_setaffinity(p, task_cpu(p)); goto out; + } dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); + __ipipe_report_setaffinity(p, dest_cpu); if (task_running(rq, p) || p->state == TASK_WAKING) { struct migration_arg arg = { p, dest_cpu }; /* Need help from migration thread: drop lock and wait. */ @@ -1781,7 +1785,9 @@ * however a fair share of IPIs are still resched only so this would * somewhat pessimize the simple resched case. */ +#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI irq_enter(); +#endif sched_ttwu_pending(); /* @@ -1791,7 +1797,9 @@ this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); } +#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI irq_exit(); +#endif } static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) @@ -1978,7 +1986,8 @@ */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); - if (!(p->state & state)) + if (!(p->state & state) || + (p->state & (TASK_NOWAKEUP|TASK_HARDENING))) goto out; trace_sched_waking(p); @@ -2735,6 +2744,7 @@ * PREEMPT_COUNT kernels). */ + __ipipe_complete_domain_migration(); rq = finish_task_switch(prev); balance_callback(rq); preempt_enable(); @@ -2790,6 +2800,9 @@ switch_to(prev, next, prev); barrier(); + if (unlikely(__ipipe_switch_tail())) + return NULL; + return finish_task_switch(prev); } @@ -3176,6 +3189,7 @@ */ static inline void schedule_debug(struct task_struct *prev) { + ipipe_root_only(); #ifdef CONFIG_SCHED_STACK_END_CHECK if (task_stack_end_corrupted(prev)) panic("corrupted stack end detected inside scheduler\n"); @@ -3275,7 +3289,7 @@ * * WARNING: must be called with preemption disabled! */ -static void __sched notrace __schedule(bool preempt) +static bool __sched notrace __schedule(bool preempt) { struct task_struct *prev, *next; unsigned long *switch_count; @@ -3364,12 +3378,17 @@ /* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); + if (rq == NULL) + return true; /* task hijacked by head domain */ } else { + prev->state &= ~TASK_HARDENING; rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); rq_unlock_irq(rq, &rf); } balance_callback(rq); + + return false; } void __noreturn do_task_dead(void) @@ -3422,7 +3441,8 @@ sched_submit_work(tsk); do { preempt_disable(); - __schedule(false); + if (__schedule(false)) + return; sched_preempt_enable_no_resched(); } while (need_resched()); } @@ -3502,7 +3522,8 @@ */ preempt_disable_notrace(); preempt_latency_start(1); - __schedule(true); + if (__schedule(true)) + return; preempt_latency_stop(1); preempt_enable_no_resched_notrace(); @@ -3525,7 +3546,7 @@ * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. */ - if (likely(!preemptible())) + if (likely(!preemptible() || !ipipe_root_p)) return; preempt_schedule_common(); @@ -3551,7 +3572,7 @@ { enum ctx_state prev_ctx; - if (likely(!preemptible())) + if (likely(!preemptible() || !ipipe_root_p || hard_irqs_disabled())) return; do { @@ -4213,6 +4234,7 @@ prev_class = p->sched_class; __setscheduler(rq, p, attr, pi); + __ipipe_report_setsched(p); if (queued) { /* @@ -5779,6 +5801,45 @@ && addr < (unsigned long)__sched_text_end); } +#ifdef CONFIG_IPIPE + +int __ipipe_migrate_head(void) +{ + struct task_struct *p = current; + + preempt_disable(); + + IPIPE_WARN_ONCE(__this_cpu_read(ipipe_percpu.task_hijacked) != NULL); + + __this_cpu_write(ipipe_percpu.task_hijacked, p); + set_current_state(TASK_INTERRUPTIBLE | TASK_HARDENING); + sched_submit_work(p); + if (likely(__schedule(false))) + return 0; + + BUG_ON(!signal_pending(p)); + + preempt_enable(); + return -ERESTARTSYS; +} +EXPORT_SYMBOL_GPL(__ipipe_migrate_head); + +void __ipipe_reenter_root(void) +{ + struct rq *rq; + struct task_struct *p; + + p = __this_cpu_read(ipipe_percpu.rqlock_owner); + BUG_ON(p == NULL); + ipipe_clear_thread_flag(TIP_HEAD); + rq = finish_task_switch(p); + balance_callback(rq); + preempt_enable_no_resched_notrace(); +} +EXPORT_SYMBOL_GPL(__ipipe_reenter_root); + +#endif /* CONFIG_IPIPE */ + #ifdef CONFIG_CGROUP_SCHED /* * Default task group. diff -ruN linux-org/kernel/sched/idle.c linux/kernel/sched/idle.c --- linux-org/kernel/sched/idle.c 2022-03-25 09:55:42.725450348 +0100 +++ linux/kernel/sched/idle.c 2022-03-25 10:15:23.365002965 +0100 @@ -91,11 +91,12 @@ */ void __cpuidle default_idle_call(void) { - if (current_clr_polling_and_test()) { + if (current_clr_polling_and_test() || !__ipipe_enter_cpuidle()) { local_irq_enable(); } else { stop_critical_timings(); arch_cpu_idle(); + ipipe_exit_cpuidle(); start_critical_timings(); } } diff -ruN linux-org/kernel/sched/wait.c linux/kernel/sched/wait.c --- linux-org/kernel/sched/wait.c 2022-03-25 09:55:42.729450332 +0100 +++ linux/kernel/sched/wait.c 2022-03-25 10:15:23.365002965 +0100 @@ -84,6 +84,8 @@ } else curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry); + ipipe_root_only(); + if (&curr->entry == &wq_head->head) return nr_exclusive; diff -ruN linux-org/kernel/signal.c linux/kernel/signal.c --- linux-org/kernel/signal.c 2022-03-25 09:55:42.729450332 +0100 +++ linux/kernel/signal.c 2022-03-25 10:15:23.365002965 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -661,6 +662,10 @@ void signal_wake_up_state(struct task_struct *t, unsigned int state) { set_tsk_thread_flag(t, TIF_SIGPENDING); + + /* TIF_SIGPENDING must be prior to reporting. */ + __ipipe_report_sigwake(t); + /* * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it @@ -884,8 +889,11 @@ return 0; if (sig == SIGKILL) return 1; - if (task_is_stopped_or_traced(p)) + if (task_is_stopped_or_traced(p)) { + if (!signal_pending(p)) + __ipipe_report_sigwake(p); return 0; + } return task_curr(p) || !signal_pending(p); } diff -ruN linux-org/kernel/time/clockevents.c linux/kernel/time/clockevents.c --- linux-org/kernel/time/clockevents.c 2022-03-25 09:55:42.733450318 +0100 +++ linux/kernel/time/clockevents.c 2022-03-25 10:15:23.365002965 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include "tick-internal.h" @@ -453,6 +454,8 @@ /* Initialize state to DETACHED */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); + ipipe_host_timer_register(dev); + if (!dev->cpumask) { WARN_ON(num_possible_cpus() > 1); dev->cpumask = cpumask_of(smp_processor_id()); diff -ruN linux-org/kernel/time/clocksource.c linux/kernel/time/clocksource.c --- linux-org/kernel/time/clocksource.c 2022-03-25 09:55:42.737450302 +0100 +++ linux/kernel/time/clocksource.c 2022-03-25 10:15:23.365002965 +0100 @@ -32,6 +32,7 @@ #include /* for spin_unlock_irq() using preempt_count() m68k */ #include #include +#include #include "tick-internal.h" #include "timekeeping_internal.h" @@ -177,6 +178,9 @@ u64 csnow, wdnow, cslast, wdlast, delta; int64_t wd_nsec, cs_nsec; int next_cpu, reset_pending; +#ifdef CONFIG_IPIPE + u64 wdref; +#endif spin_lock(&watchdog_lock); if (!watchdog_running) @@ -193,11 +197,24 @@ continue; } +#ifdef CONFIG_IPIPE +retry: +#endif local_irq_disable(); +#ifdef CONFIG_IPIPE + wdref = watchdog->read(watchdog); +#endif csnow = cs->read(cs); wdnow = watchdog->read(watchdog); local_irq_enable(); +#ifdef CONFIG_IPIPE + wd_nsec = clocksource_cyc2ns((wdnow - wdref) & watchdog->mask, + watchdog->mult, watchdog->shift); + if (wd_nsec > WATCHDOG_THRESHOLD) + goto retry; +#endif + /* Clocksource initialized ? */ if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || atomic_read(&watchdog_reset_pending)) { @@ -678,6 +695,95 @@ } fs_initcall(clocksource_done_booting); +#ifdef CONFIG_IPIPE_WANT_CLOCKSOURCE +unsigned long long __ipipe_cs_freq; +EXPORT_SYMBOL_GPL(__ipipe_cs_freq); + +struct clocksource *__ipipe_cs; +EXPORT_SYMBOL_GPL(__ipipe_cs); + +u64 (*__ipipe_cs_read)(struct clocksource *cs); +u64 __ipipe_cs_last_tsc; +u64 __ipipe_cs_mask; +unsigned __ipipe_cs_lat = 0xffffffff; + +static void ipipe_check_clocksource(struct clocksource *cs) +{ + u64 (*cread)(struct clocksource *cs); + u64 lat, mask, saved; + unsigned long long freq; + unsigned long flags; + unsigned i; + + if (cs->ipipe_read) { + mask = CLOCKSOURCE_MASK(64); + cread = cs->ipipe_read; + } else { + mask = cs->mask; + cread = cs->read; + + if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) == 0) + return; + + /* + * We only support masks such that cs->mask + 1 is a power of 2, + * 64 bits masks or masks lesser than 32 bits + */ + if (mask != CLOCKSOURCE_MASK(64) + && ((mask & (mask + 1)) != 0 || mask > 0xffffffff)) + return; + } + + /* + * We prefer a clocksource with a better resolution than 1us + */ + if (cs->shift <= 34) { + freq = 1000000000ULL << cs->shift; + do_div(freq, cs->mult); + } else { + freq = 1000000ULL << cs->shift; + do_div(freq, cs->mult); + freq *= 1000; + } + if (freq < 1000000) + return; + + /* Measure the clocksource latency */ + flags = hard_local_irq_save(); + saved = __ipipe_cs_last_tsc; + lat = cread(cs); + for (i = 0; i < 10; i++) + cread(cs); + lat = cread(cs) - lat; + __ipipe_cs_last_tsc = saved; + hard_local_irq_restore(flags); + lat = (lat * cs->mult) >> cs->shift; + do_div(lat, i + 1); + + if (!strcmp(cs->name, override_name)) + goto skip_tests; + + if (lat > __ipipe_cs_lat) + return; + + if (__ipipe_cs && !strcmp(__ipipe_cs->name, override_name)) + return; + + skip_tests: + flags = hard_local_irq_save(); + if (__ipipe_cs_last_tsc == 0) { + __ipipe_cs_lat = lat; + __ipipe_cs_freq = freq; + __ipipe_cs = cs; + __ipipe_cs_read = cread; + __ipipe_cs_mask = mask; + } + hard_local_irq_restore(flags); +} +#else /* !CONFIG_IPIPE_WANT_CLOCKSOURCE */ +#define ipipe_check_clocksource(cs) do { }while (0) +#endif /* !CONFIG_IPIPE_WANT_CLOCKSOURCE */ + /* * Enqueue the clocksource sorted by rating */ @@ -693,6 +799,8 @@ entry = &tmp->list; } list_add(&cs->list, entry); + + ipipe_check_clocksource(cs); } /** diff -ruN linux-org/kernel/time/timekeeping.c linux/kernel/time/timekeeping.c --- linux-org/kernel/time/timekeeping.c 2022-03-25 09:55:42.741450287 +0100 +++ linux/kernel/time/timekeeping.c 2022-03-25 10:15:23.365002965 +0100 @@ -525,7 +525,7 @@ xt = timespec64_to_timespec(tk_xtime(tk)); wm = timespec64_to_timespec(tk->wall_to_monotonic); update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult, - tk->tkr_mono.cycle_last); + tk->tkr_mono.shift, tk->tkr_mono.cycle_last); } static inline void old_vsyscall_fixup(struct timekeeper *tk) diff -ruN linux-org/kernel/time/timer.c linux/kernel/time/timer.c --- linux-org/kernel/time/timer.c 2022-03-25 09:55:42.745450272 +0100 +++ linux/kernel/time/timer.c 2022-03-25 10:15:23.365002965 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1574,6 +1575,15 @@ } #endif +static inline void do_account_tick(struct task_struct *p, int user_tick) +{ +#ifdef CONFIG_IPIPE + if (!__ipipe_root_tick_p(raw_cpu_ptr(&ipipe_percpu.tick_regs))) + return; +#endif + account_process_tick(p, user_tick); +} + /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. @@ -1583,7 +1593,7 @@ struct task_struct *p = current; /* Note: this timer irq context must be accounted for as well. */ - account_process_tick(p, user_tick); + do_account_tick(p, user_tick); run_local_timers(); rcu_check_callbacks(user_tick); #ifdef CONFIG_IRQ_WORK diff -ruN linux-org/kernel/trace/ftrace.c linux/kernel/trace/ftrace.c --- linux-org/kernel/trace/ftrace.c 2022-03-25 09:55:42.745450272 +0100 +++ linux/kernel/trace/ftrace.c 2022-03-25 10:15:23.365002965 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -271,8 +272,17 @@ static void update_ftrace_function(void) { + struct ftrace_ops *ops; ftrace_func_t func; + for (ops = ftrace_ops_list; + ops != &ftrace_list_end; ops = ops->next) + if (ops->flags & FTRACE_OPS_FL_IPIPE_EXCLUSIVE) { + set_function_trace_op = ops; + func = ops->func; + goto set_pointers; + } + /* * Prepare the ftrace_ops that the arch callback will use. * If there's only one ftrace_ops registered, the ftrace_ops_list @@ -302,6 +312,7 @@ update_function_graph_func(); + set_pointers: /* If there's no change, then do nothing more here */ if (ftrace_trace_function == func) return; @@ -2689,6 +2700,9 @@ static void ftrace_run_update_code(int command) { +#ifdef CONFIG_IPIPE + unsigned long flags; +#endif /* CONFIG_IPIPE */ int ret; ret = ftrace_arch_code_modify_prepare(); @@ -2702,7 +2716,13 @@ * is safe. The stop_machine() is the safest, but also * produces the most overhead. */ +#ifdef CONFIG_IPIPE + flags = ipipe_critical_enter(NULL); + __ftrace_modify_code(&command); + ipipe_critical_exit(flags); +#else /* !CONFIG_IPIPE */ arch_ftrace_update_code(command); +#endif /* !CONFIG_IPIPE */ ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); @@ -5660,10 +5680,10 @@ * reason to cause large interrupt latencies while we do it. */ if (!mod) - local_irq_save(flags); + flags = hard_local_irq_save(); ftrace_update_code(mod, start_pg); if (!mod) - local_irq_restore(flags); + hard_local_irq_restore(flags); ret = 0; out: mutex_unlock(&ftrace_lock); @@ -5916,9 +5936,11 @@ unsigned long count, flags; int ret; - local_irq_save(flags); + flags = hard_local_irq_save_notrace(); ret = ftrace_dyn_arch_init(); - local_irq_restore(flags); + hard_local_irq_restore_notrace(flags); + + /* ftrace_dyn_arch_init places the return code in addr */ if (ret) goto failed; @@ -6074,7 +6096,16 @@ } } while_for_each_ftrace_op(op); out: - preempt_enable_notrace(); +#ifdef CONFIG_IPIPE + if (hard_irqs_disabled() || !__ipipe_root_p) + /* + * Nothing urgent to schedule here. At latest the timer tick + * will pick up whatever the tracing functions kicked off. + */ + preempt_enable_no_resched_notrace(); + else +#endif + preempt_enable_notrace(); trace_clear_recursion(bit); } diff -ruN linux-org/kernel/trace/Kconfig linux/kernel/trace/Kconfig --- linux-org/kernel/trace/Kconfig 2022-03-25 09:55:42.745450272 +0100 +++ linux/kernel/trace/Kconfig 2022-03-25 10:15:23.365002965 +0100 @@ -480,6 +480,7 @@ bool "enable/disable function tracing dynamically" depends on FUNCTION_TRACER depends on HAVE_DYNAMIC_FTRACE + depends on !IPIPE default y help This option will modify all the calls to function tracing diff -ruN linux-org/kernel/trace/ring_buffer.c linux/kernel/trace/ring_buffer.c --- linux-org/kernel/trace/ring_buffer.c 2022-03-25 09:55:42.749450257 +0100 +++ linux/kernel/trace/ring_buffer.c 2022-03-25 10:15:23.365002965 +0100 @@ -2582,7 +2582,8 @@ static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { - unsigned int val = cpu_buffer->current_context; + unsigned long flags; + unsigned int val; int bit; if (in_interrupt()) { @@ -2595,19 +2596,30 @@ } else bit = RB_CTX_NORMAL; - if (unlikely(val & (1 << bit))) + flags = hard_local_irq_save(); + + val = cpu_buffer->current_context; + if (unlikely(val & (1 << bit))) { + hard_local_irq_restore(flags); return 1; + } val |= (1 << bit); cpu_buffer->current_context = val; + hard_local_irq_restore(flags); + return 0; } static __always_inline void trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { + unsigned long flags; + + flags = hard_local_irq_save(); cpu_buffer->current_context &= cpu_buffer->current_context - 1; + hard_local_irq_restore(flags); } /** diff -ruN linux-org/kernel/trace/trace.c linux/kernel/trace/trace.c --- linux-org/kernel/trace/trace.c 2022-03-25 09:55:42.749450257 +0100 +++ linux/kernel/trace/trace.c 2022-03-25 10:15:23.365002965 +0100 @@ -2910,8 +2910,9 @@ /* Don't pollute graph traces with trace_vprintk internals */ pause_graph_tracing(); + flags = hard_local_irq_save(); + pc = preempt_count(); - preempt_disable_notrace(); tbuffer = get_trace_buf(); if (!tbuffer) { @@ -2924,7 +2925,6 @@ if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) goto out; - local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->trace_buffer.buffer; event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, @@ -2945,7 +2945,7 @@ put_trace_buf(); out_nobuffer: - preempt_enable_notrace(); + hard_local_irq_restore(flags); unpause_graph_tracing(); return len; diff -ruN linux-org/kernel/trace/trace_clock.c linux/kernel/trace/trace_clock.c --- linux-org/kernel/trace/trace_clock.c 2022-03-25 09:55:42.753450242 +0100 +++ linux/kernel/trace/trace_clock.c 2022-03-25 10:15:23.365002965 +0100 @@ -96,7 +96,7 @@ int this_cpu; u64 now; - local_irq_save(flags); + flags = hard_local_irq_save_notrace(); this_cpu = raw_smp_processor_id(); now = sched_clock_cpu(this_cpu); @@ -122,7 +122,7 @@ arch_spin_unlock(&trace_clock_struct.lock); out: - local_irq_restore(flags); + hard_local_irq_restore_notrace(flags); return now; } diff -ruN linux-org/kernel/trace/trace_functions.c linux/kernel/trace/trace_functions.c --- linux-org/kernel/trace/trace_functions.c 2022-03-25 09:55:42.757450226 +0100 +++ linux/kernel/trace/trace_functions.c 2022-03-25 10:15:23.369002950 +0100 @@ -172,7 +172,7 @@ * Need to use raw, since this must be called before the * recursive protection is performed. */ - local_irq_save(flags); + flags = hard_local_irq_save(); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->trace_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); @@ -192,7 +192,7 @@ } atomic_dec(&data->disabled); - local_irq_restore(flags); + hard_local_irq_restore(flags); } static struct tracer_opt func_opts[] = { diff -ruN linux-org/kernel/trace/trace_functions_graph.c linux/kernel/trace/trace_functions_graph.c --- linux-org/kernel/trace/trace_functions_graph.c 2022-03-25 09:55:42.757450226 +0100 +++ linux/kernel/trace/trace_functions_graph.c 2022-03-25 10:15:23.369002950 +0100 @@ -408,7 +408,7 @@ if (tracing_thresh) return 1; - local_irq_save(flags); + flags = hard_local_irq_save_notrace(); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->trace_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); @@ -420,7 +420,7 @@ } atomic_dec(&data->disabled); - local_irq_restore(flags); + hard_local_irq_restore_notrace(flags); return ret; } @@ -482,7 +482,7 @@ int cpu; int pc; - local_irq_save(flags); + flags = hard_local_irq_save_notrace(); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->trace_buffer.data, cpu); disabled = atomic_inc_return(&data->disabled); @@ -491,7 +491,7 @@ __trace_graph_return(tr, trace, flags, pc); } atomic_dec(&data->disabled); - local_irq_restore(flags); + hard_local_irq_restore_notrace(flags); } void set_graph_array(struct trace_array *tr) diff -ruN linux-org/kernel/trace/trace_irqsoff.c linux/kernel/trace/trace_irqsoff.c --- linux-org/kernel/trace/trace_irqsoff.c 2022-03-25 09:55:42.757450226 +0100 +++ linux/kernel/trace/trace_irqsoff.c 2022-03-25 10:15:23.369002950 +0100 @@ -483,28 +483,28 @@ */ void trace_hardirqs_on(void) { - if (!preempt_trace() && irq_trace()) + if (ipipe_root_p && !preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_on); void trace_hardirqs_off(void) { - if (!preempt_trace() && irq_trace()) + if (ipipe_root_p && !preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_off); __visible void trace_hardirqs_on_caller(unsigned long caller_addr) { - if (!preempt_trace() && irq_trace()) + if (ipipe_root_p && !preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_on_caller); __visible void trace_hardirqs_off_caller(unsigned long caller_addr) { - if (!preempt_trace() && irq_trace()) + if (ipipe_root_p && !preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_off_caller); diff -ruN linux-org/lib/atomic64.c linux/lib/atomic64.c --- linux-org/lib/atomic64.c 2022-03-25 09:55:42.765450196 +0100 +++ linux/lib/atomic64.c 2022-03-25 10:15:23.369002950 +0100 @@ -29,15 +29,15 @@ * Ensure each lock is in a separate cacheline. */ static union { - raw_spinlock_t lock; + ipipe_spinlock_t lock; char pad[L1_CACHE_BYTES]; } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = { [0 ... (NR_LOCKS - 1)] = { - .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock), + .lock = IPIPE_SPIN_LOCK_UNLOCKED, }, }; -static inline raw_spinlock_t *lock_addr(const atomic64_t *v) +static inline ipipe_spinlock_t *lock_addr(const atomic64_t *v) { unsigned long addr = (unsigned long) v; @@ -49,7 +49,7 @@ long long atomic64_read(const atomic64_t *v) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + ipipe_spinlock_t *lock = lock_addr(v); long long val; raw_spin_lock_irqsave(lock, flags); @@ -62,7 +62,7 @@ void atomic64_set(atomic64_t *v, long long i) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + ipipe_spinlock_t *lock = lock_addr(v); raw_spin_lock_irqsave(lock, flags); v->counter = i; @@ -74,7 +74,7 @@ void atomic64_##op(long long a, atomic64_t *v) \ { \ unsigned long flags; \ - raw_spinlock_t *lock = lock_addr(v); \ + ipipe_spinlock_t *lock = lock_addr(v); \ \ raw_spin_lock_irqsave(lock, flags); \ v->counter c_op a; \ @@ -86,7 +86,7 @@ long long atomic64_##op##_return(long long a, atomic64_t *v) \ { \ unsigned long flags; \ - raw_spinlock_t *lock = lock_addr(v); \ + ipipe_spinlock_t *lock = lock_addr(v); \ long long val; \ \ raw_spin_lock_irqsave(lock, flags); \ @@ -100,7 +100,7 @@ long long atomic64_fetch_##op(long long a, atomic64_t *v) \ { \ unsigned long flags; \ - raw_spinlock_t *lock = lock_addr(v); \ + ipipe_spinlock_t *lock = lock_addr(v); \ long long val; \ \ raw_spin_lock_irqsave(lock, flags); \ @@ -137,7 +137,7 @@ long long atomic64_dec_if_positive(atomic64_t *v) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + ipipe_spinlock_t *lock = lock_addr(v); long long val; raw_spin_lock_irqsave(lock, flags); @@ -152,7 +152,7 @@ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + ipipe_spinlock_t *lock = lock_addr(v); long long val; raw_spin_lock_irqsave(lock, flags); @@ -167,7 +167,7 @@ long long atomic64_xchg(atomic64_t *v, long long new) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + ipipe_spinlock_t *lock = lock_addr(v); long long val; raw_spin_lock_irqsave(lock, flags); @@ -181,7 +181,7 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u) { unsigned long flags; - raw_spinlock_t *lock = lock_addr(v); + ipipe_spinlock_t *lock = lock_addr(v); int ret = 0; raw_spin_lock_irqsave(lock, flags); diff -ruN linux-org/lib/bust_spinlocks.c linux/lib/bust_spinlocks.c --- linux-org/lib/bust_spinlocks.c 2022-03-25 09:55:42.765450196 +0100 +++ linux/lib/bust_spinlocks.c 2022-03-25 10:15:23.369002950 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include void __attribute__((weak)) bust_spinlocks(int yes) @@ -26,6 +27,7 @@ unblank_screen(); #endif console_unblank(); + ipipe_trace_panic_dump(); if (--oops_in_progress == 0) wake_up_klogd(); } diff -ruN linux-org/lib/dump_stack.c linux/lib/dump_stack.c --- linux-org/lib/dump_stack.c 2022-03-25 09:55:42.769450182 +0100 +++ linux/lib/dump_stack.c 2022-03-25 10:15:23.369002950 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include static void __dump_stack(void) { @@ -25,6 +26,29 @@ #ifdef CONFIG_SMP static atomic_t dump_lock = ATOMIC_INIT(-1); +static unsigned long disable_local_irqs(void) +{ + unsigned long flags = 0; /* only to trick the UMR detection */ + + /* + * We neither need nor want to disable root stage IRQs over + * the head stage, where CPU migration can't + * happen. Conversely, we neither need nor want to disable + * hard IRQs from the head stage, so that latency won't + * skyrocket as a result of dumping the stack backtrace. + */ + if (ipipe_root_p) + local_irq_save(flags); + + return flags; +} + +static void restore_local_irqs(unsigned long flags) +{ + if (ipipe_root_p) + local_irq_restore(flags); +} + asmlinkage __visible void dump_stack(void) { unsigned long flags; @@ -37,7 +61,7 @@ * against other CPUs */ retry: - local_irq_save(flags); + flags = disable_local_irqs(); cpu = smp_processor_id(); old = atomic_cmpxchg(&dump_lock, -1, cpu); if (old == -1) { @@ -45,7 +69,7 @@ } else if (old == cpu) { was_locked = 1; } else { - local_irq_restore(flags); + restore_local_irqs(flags); cpu_relax(); goto retry; } @@ -55,7 +79,7 @@ if (!was_locked) atomic_set(&dump_lock, -1); - local_irq_restore(flags); + restore_local_irqs(flags); } #else asmlinkage __visible void dump_stack(void) diff -ruN linux-org/lib/ioremap.c linux/lib/ioremap.c --- linux-org/lib/ioremap.c 2022-03-25 09:55:42.773450166 +0100 +++ linux/lib/ioremap.c 2022-03-25 10:15:23.369002950 +0100 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -177,7 +178,12 @@ break; } while (pgd++, addr = next, addr != end); - flush_cache_vmap(start, end); + /* APEI may invoke this for temporarily remapping pages in interrupt + * context - nothing we can and need to propagate globally. */ + if (!in_interrupt()) { + __ipipe_pin_mapping_globally(start, end); + flush_cache_vmap(start, end); + } return err; } diff -ruN linux-org/lib/Kconfig.debug linux/lib/Kconfig.debug --- linux-org/lib/Kconfig.debug 2022-03-25 09:55:42.765450196 +0100 +++ linux/lib/Kconfig.debug 2022-03-25 10:15:23.369002950 +0100 @@ -412,6 +412,7 @@ keys are documented in . Don't say Y unless you really know what this hack does. + config MAGIC_SYSRQ_DEFAULT_ENABLE hex "Enable magic SysRq key functions by default" depends on MAGIC_SYSRQ @@ -431,6 +432,8 @@ This option allows you to decide whether you want to enable the magic SysRq key. +source "kernel/ipipe/Kconfig.debug" + config DEBUG_KERNEL bool "Kernel debugging" help diff -ruN linux-org/lib/smp_processor_id.c linux/lib/smp_processor_id.c --- linux-org/lib/smp_processor_id.c 2022-03-25 09:55:42.785450121 +0100 +++ linux/lib/smp_processor_id.c 2022-03-25 10:15:23.369002950 +0100 @@ -7,12 +7,19 @@ #include #include #include +#include notrace static unsigned int check_preemption_disabled(const char *what1, const char *what2) { int this_cpu = raw_smp_processor_id(); + if (hard_irqs_disabled()) + goto out; + + if (!ipipe_root_p) + goto out; + if (likely(preempt_count())) goto out; diff -ruN linux-org/Makefile linux/Makefile --- linux-org/Makefile 2022-03-25 09:55:36.041475525 +0100 +++ linux/Makefile 2022-03-25 10:19:24.996092782 +0100 @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 37 -EXTRAVERSION = +EXTRAVERSION = -xenomai NAME = Petit Gorille # *DOCUMENTATION* diff -ruN linux-org/mm/memory.c linux/mm/memory.c --- linux-org/mm/memory.c 2022-03-25 09:55:42.905449669 +0100 +++ linux/mm/memory.c 2022-03-25 10:15:23.369002950 +0100 @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,11 @@ unsigned long highest_memmap_pfn __read_mostly; +static inline void cow_user_page(struct page *dst, + struct page *src, + unsigned long va, + struct vm_area_struct *vma); + /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() */ @@ -939,8 +945,8 @@ static inline unsigned long copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, - unsigned long addr, int *rss) + pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, + unsigned long addr, int *rss, struct page *uncow_page) { unsigned long vm_flags = vma->vm_flags; pte_t pte = *src_pte; @@ -1018,6 +1024,21 @@ * in the parent and the child */ if (is_cow_mapping(vm_flags)) { +#ifdef CONFIG_IPIPE + if (uncow_page) { + struct page *old_page = vm_normal_page(vma, addr, pte); + cow_user_page(uncow_page, old_page, addr, vma); + pte = mk_pte(uncow_page, vma->vm_page_prot); + + if (vm_flags & VM_SHARED) + pte = pte_mkclean(pte); + pte = pte_mkold(pte); + + page_add_new_anon_rmap(uncow_page, vma, addr, false); + rss[!!PageAnon(uncow_page)]++; + goto out_set_pte; + } +#endif /* CONFIG_IPIPE */ ptep_set_wrprotect(src_mm, addr, src_pte); pte = pte_wrprotect(pte); } @@ -1065,13 +1086,27 @@ int progress = 0; int rss[NR_MM_COUNTERS]; swp_entry_t entry = (swp_entry_t){0}; - + struct page *uncow_page = NULL; +#ifdef CONFIG_IPIPE + int do_cow_break = 0; +again: + if (do_cow_break) { + uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + if (uncow_page == NULL) + return -ENOMEM; + do_cow_break = 0; + } +#else again: +#endif init_rss_vec(rss); dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); - if (!dst_pte) + if (!dst_pte) { + if (uncow_page) + put_page(uncow_page); return -ENOMEM; + } src_pte = pte_offset_map(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); @@ -1094,8 +1129,25 @@ progress++; continue; } +#ifdef CONFIG_IPIPE + if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) { + if (is_cow_mapping(vma->vm_flags) && + test_bit(MMF_VM_PINNED, &src_mm->flags) && + ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) { + arch_leave_lazy_mmu_mode(); + spin_unlock(src_ptl); + pte_unmap(src_pte); + add_mm_rss_vec(dst_mm, rss); + pte_unmap_unlock(dst_pte, dst_ptl); + cond_resched(); + do_cow_break = 1; + goto again; + } + } +#endif entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, - vma, addr, rss); + vma, addr, rss, uncow_page); + uncow_page = NULL; if (entry.val) break; progress += 8; @@ -4642,6 +4694,41 @@ } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ +#ifdef CONFIG_IPIPE + +int __ipipe_disable_ondemand_mappings(struct task_struct *tsk) +{ + struct vm_area_struct *vma; + struct mm_struct *mm; + int result = 0; + + mm = get_task_mm(tsk); + if (!mm) + return -EPERM; + + down_write(&mm->mmap_sem); + if (test_bit(MMF_VM_PINNED, &mm->flags)) + goto done_mm; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (is_cow_mapping(vma->vm_flags) && + (vma->vm_flags & VM_WRITE)) { + result = __ipipe_pin_vma(mm, vma); + if (result < 0) + goto done_mm; + } + } + set_bit(MMF_VM_PINNED, &mm->flags); + + done_mm: + up_write(&mm->mmap_sem); + mmput(mm); + return result; +} +EXPORT_SYMBOL_GPL(__ipipe_disable_ondemand_mappings); + +#endif /* CONFIG_IPIPE */ + #if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS static struct kmem_cache *page_ptl_cachep; diff -ruN linux-org/mm/mlock.c linux/mm/mlock.c --- linux-org/mm/mlock.c 2022-03-25 09:55:42.933449564 +0100 +++ linux/mm/mlock.c 2022-03-25 10:15:23.369002950 +0100 @@ -864,3 +864,27 @@ spin_unlock(&shmlock_user_lock); free_uid(user); } + +#ifdef CONFIG_IPIPE +int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma) +{ + int ret, write, len; + + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) + return 0; + + if (!((vma->vm_flags & VM_DONTEXPAND) || + is_vm_hugetlb_page(vma) || vma == get_gate_vma(mm))) { + ret = populate_vma_page_range(vma, vma->vm_start, vma->vm_end, + NULL); + return ret < 0 ? ret : 0; + } + + write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE; + len = DIV_ROUND_UP(vma->vm_end, PAGE_SIZE) - vma->vm_start/PAGE_SIZE; + ret = get_user_pages(vma->vm_start, len, write, 0, NULL); + if (ret < 0) + return ret; + return ret == len ? 0 : -EFAULT; +} +#endif diff -ruN linux-org/mm/mmu_context.c linux/mm/mmu_context.c --- linux-org/mm/mmu_context.c 2022-03-25 09:55:42.937449549 +0100 +++ linux/mm/mmu_context.c 2022-03-25 10:15:23.369002950 +0100 @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -23,15 +24,18 @@ { struct mm_struct *active_mm; struct task_struct *tsk = current; + unsigned long flags; task_lock(tsk); active_mm = tsk->active_mm; + ipipe_mm_switch_protect(flags); if (active_mm != mm) { mmgrab(mm); tsk->active_mm = mm; } tsk->mm = mm; switch_mm(active_mm, mm, tsk); + ipipe_mm_switch_unprotect(flags); task_unlock(tsk); #ifdef finish_arch_post_lock_switch finish_arch_post_lock_switch(); diff -ruN linux-org/mm/mprotect.c linux/mm/mprotect.c --- linux-org/mm/mprotect.c 2022-03-25 09:55:42.937449549 +0100 +++ linux/mm/mprotect.c 2022-03-25 10:15:23.369002950 +0100 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,7 @@ struct mm_struct *mm = vma->vm_mm; pte_t *pte, oldpte; spinlock_t *ptl; - unsigned long pages = 0; + unsigned long pages = 0, flags; int target_node = NUMA_NO_NODE; /* @@ -96,6 +97,7 @@ continue; } + flags = hard_local_irq_save(); ptent = ptep_modify_prot_start(mm, addr, pte); ptent = pte_modify(ptent, newprot); if (preserve_write) @@ -108,6 +110,7 @@ ptent = pte_mkwrite(ptent); } ptep_modify_prot_commit(mm, addr, pte, ptent); + hard_local_irq_restore(flags); pages++; } else if (IS_ENABLED(CONFIG_MIGRATION)) { swp_entry_t entry = pte_to_swp_entry(oldpte); @@ -288,6 +291,12 @@ pages = hugetlb_change_protection(vma, start, end, newprot); else pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); +#ifdef CONFIG_IPIPE + if (test_bit(MMF_VM_PINNED, &vma->vm_mm->flags) && + ((vma->vm_flags | vma->vm_mm->def_flags) & VM_LOCKED) && + (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + __ipipe_pin_vma(vma->vm_mm, vma); +#endif return pages; } diff -ruN linux-org/mm/vmalloc.c linux/mm/vmalloc.c --- linux-org/mm/vmalloc.c 2022-03-25 09:55:42.957449474 +0100 +++ linux/mm/vmalloc.c 2022-03-25 10:15:23.369002950 +0100 @@ -232,6 +232,8 @@ return err; } while (pgd++, addr = next, addr != end); + __ipipe_pin_mapping_globally(start, end); + return nr; }