i386-specific code. The main chunk of i386-specific code. Various fixes have not yet been merged in this part. Some have been, instead: * SKAS: rename modify_ldt to avoid conflicts SKAS adds a function modify_ldt (closely related to sys_modify_ldt) but the declaration conflicts with some external code (ATI kernel modules). Since the name is not important, rename the function to __modify_ldt. * Description of the SMP hack in include/asm-i386/mmu_context.h: From: Jeff Dike and me, Paolo Giarrusso The removed panic can be triggered if it happens that: 1) a UML thread sleeps 2) a kernel-thread is scheduled on the same CPU and lazy-TLB switching is used 3) on any other CPU PTRACE_SWITCH_MM is called onto the sleeping thread 4) and the sleeping thread is rescheduled onto the same CPU; the same task is going back BUT you need a context switch, since that task has changed its ->mm. By the way, the combination of 1), 3) and 4) is likely to occur (I think it happens on every context switch inside the guest); the 2) part is the one not always happening (when the CPU goes to sleep, indeed it runs with lazy-TLB the idle thread, so 2) is happening). Signed-off-by: Paolo 'Blaisorblade' Giarrusso Index: linux-2.6.git/arch/i386/Kconfig =================================================================== --- linux-2.6.git.orig/arch/i386/Kconfig +++ linux-2.6.git/arch/i386/Kconfig @@ -496,6 +496,10 @@ config X86_PAE depends on HIGHMEM64G default y +config PROC_MM + bool "/proc/mm support" + default y + # Common NUMA Features config NUMA bool "Numa Memory Allocation and Scheduler Support" Index: linux-2.6.git/arch/i386/kernel/ldt.c =================================================================== --- linux-2.6.git.orig/arch/i386/kernel/ldt.c +++ linux-2.6.git/arch/i386/kernel/ldt.c @@ -55,7 +55,7 @@ static int alloc_ldt(mm_context_t *pc, i pc->size = mincount; wmb(); - if (reload) { + if (reload && (¤t->active_mm->context == pc)) { #ifdef CONFIG_SMP cpumask_t mask; preempt_disable(); @@ -90,14 +90,12 @@ static inline int copy_ldt(mm_context_t * we do not have to muck with descriptors here, that is * done in switch_mm() as needed. */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +int __init_new_context(struct mm_struct *mm, struct mm_struct *old_mm) { - struct mm_struct * old_mm; int retval = 0; init_MUTEX(&mm->context.sem); mm->context.size = 0; - old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { down(&old_mm->context.sem); retval = copy_ldt(&mm->context, &old_mm->context); @@ -106,6 +104,11 @@ int init_new_context(struct task_struct return retval; } +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + return __init_new_context(mm, current->mm); +} + /* * No need to lock the MM as we are the last user */ @@ -122,11 +125,11 @@ void destroy_context(struct mm_struct *m } } -static int read_ldt(void __user * ptr, unsigned long bytecount) +static int read_ldt(struct mm_struct * mm, void __user * ptr, + unsigned long bytecount) { int err; unsigned long size; - struct mm_struct * mm = current->mm; if (!mm->context.size) return 0; @@ -175,9 +178,8 @@ static int read_default_ldt(void __user return err; } -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) +static int write_ldt(struct mm_struct * mm, void __user * ptr, unsigned long bytecount, int oldmode) { - struct mm_struct * mm = current->mm; __u32 entry_1, entry_2; int error; struct user_desc ldt_info; @@ -201,7 +203,7 @@ static int write_ldt(void __user * ptr, down(&mm->context.sem); if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); + error = alloc_ldt(&mm->context, ldt_info.entry_number+1, 1); if (error < 0) goto out_unlock; } @@ -231,23 +233,29 @@ out: return error; } -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr, + unsigned long bytecount) { int ret = -ENOSYS; switch (func) { case 0: - ret = read_ldt(ptr, bytecount); + ret = read_ldt(mm, ptr, bytecount); break; case 1: - ret = write_ldt(ptr, bytecount, 1); + ret = write_ldt(mm, ptr, bytecount, 1); break; case 2: ret = read_default_ldt(ptr, bytecount); break; case 0x11: - ret = write_ldt(ptr, bytecount, 0); + ret = write_ldt(mm, ptr, bytecount, 0); break; } return ret; } + +asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +{ + return __modify_ldt(current->mm, func, ptr, bytecount); +} Index: linux-2.6.git/arch/i386/kernel/ptrace.c =================================================================== --- linux-2.6.git.orig/arch/i386/kernel/ptrace.c +++ linux-2.6.git/arch/i386/kernel/ptrace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -625,6 +626,56 @@ long arch_ptrace(struct task_struct *chi (struct user_desc __user *) data); break; +#ifdef CONFIG_PROC_MM + case PTRACE_FAULTINFO: { + struct ptrace_faultinfo fault; + + fault = ((struct ptrace_faultinfo) + { .is_write = child->thread.error_code, + .addr = child->thread.cr2 }); + ret = copy_to_user((unsigned long *) data, &fault, + sizeof(fault)); + if(ret) + break; + break; + } + + case PTRACE_SIGPENDING: + ret = copy_to_user((unsigned long *) data, + &child->pending.signal, + sizeof(child->pending.signal)); + break; + + case PTRACE_LDT: { + struct ptrace_ldt ldt; + + if(copy_from_user(&ldt, (unsigned long *) data, + sizeof(ldt))){ + ret = -EIO; + break; + } + ret = __modify_ldt(child->mm, ldt.func, ldt.ptr, ldt.bytecount); + break; + } + + case PTRACE_SWITCH_MM: { + struct mm_struct *old = child->mm; + struct mm_struct *new = proc_mm_get_mm(data); + + if(IS_ERR(new)){ + ret = PTR_ERR(new); + break; + } + + atomic_inc(&new->mm_users); + child->mm = new; + child->active_mm = new; + mmput(old); + ret = 0; + break; + } +#endif + default: ret = ptrace_request(child, request, addr, data); break; Index: linux-2.6.git/arch/i386/kernel/sys_i386.c =================================================================== --- linux-2.6.git.orig/arch/i386/kernel/sys_i386.c +++ linux-2.6.git/arch/i386/kernel/sys_i386.c @@ -41,7 +41,7 @@ asmlinkage int sys_pipe(unsigned long __ } /* common code for old and new mmaps */ -static inline long do_mmap2( +long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) @@ -56,9 +56,9 @@ static inline long do_mmap2( goto out; } - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); + error = __do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff); + up_write(&mm->mmap_sem); if (file) fput(file); @@ -70,7 +70,7 @@ asmlinkage long sys_mmap2(unsigned long unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { - return do_mmap2(addr, len, prot, flags, fd, pgoff); + return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff); } /* @@ -101,7 +101,7 @@ asmlinkage int old_mmap(struct mmap_arg_ if (a.offset & ~PAGE_MASK) goto out; - err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + err = do_mmap2(current->mm, a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); out: return err; } Index: linux-2.6.git/include/asm-i386/desc.h =================================================================== --- linux-2.6.git.orig/include/asm-i386/desc.h +++ linux-2.6.git/include/asm-i386/desc.h @@ -162,6 +162,9 @@ static inline unsigned long get_desc_bas return base; } +extern int __modify_ldt(struct mm_struct * mm, int func, void __user *ptr, + unsigned long bytecount); + #endif /* !__ASSEMBLY__ */ #endif Index: linux-2.6.git/include/asm-i386/processor.h =================================================================== --- linux-2.6.git.orig/include/asm-i386/processor.h +++ linux-2.6.git/include/asm-i386/processor.h @@ -720,6 +720,8 @@ static inline void prefetchw(const void extern void select_idle_routine(const struct cpuinfo_x86 *c); +extern int __init_new_context(struct mm_struct *mm, struct mm_struct *old_mm); + #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern unsigned long boot_option_idle_override; Index: linux-2.6.git/include/asm-i386/ptrace.h =================================================================== --- linux-2.6.git.orig/include/asm-i386/ptrace.h +++ linux-2.6.git/include/asm-i386/ptrace.h @@ -87,4 +87,26 @@ extern unsigned long profile_pc(struct p #endif #endif /* __KERNEL__ */ +/*For SKAS3 support.*/ +#ifndef _LINUX_PTRACE_STRUCT_DEF +#define _LINUX_PTRACE_STRUCT_DEF + +#define PTRACE_FAULTINFO 52 +#define PTRACE_SIGPENDING 53 +#define PTRACE_LDT 54 +#define PTRACE_SWITCH_MM 55 + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#endif /*ifndef _LINUX_PTRACE_STRUCT_DEF*/ + #endif Index: linux-2.6.git/include/asm-i386/mmu_context.h =================================================================== --- linux-2.6.git.orig/include/asm-i386/mmu_context.h +++ linux-2.6.git/include/asm-i386/mmu_context.h @@ -29,6 +29,10 @@ static inline void switch_mm(struct mm_s { int cpu = smp_processor_id(); +#ifdef CONFIG_SMP + prev = per_cpu(cpu_tlbstate, cpu).active_mm; +#endif + if (likely(prev != next)) { /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); @@ -50,7 +54,6 @@ static inline void switch_mm(struct mm_s #ifdef CONFIG_SMP else { per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; - BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled