/* * Copyright (C) Ingo Molnar, 2002 * Copyright (C) Paolo 'Blaisorblade' Giarrusso, 2005 * * Licensed by Paolo Giarrusso under the GPL (no license provided by Ingo * Molnar, assuming GPL) */ /************************************************************* * Configuration section START * Use numerical defines, we test them with #if, not #ifdef. */ //Override config to get all faults if set. #define CONFIG_OVERRIDE_REQUIRE_FAULTS 0 //With this set, this program will map pages linearly. #define CONFIG_LINEAR 0 //If permission work correctly, or you want to test that, set to one. //It will use PROT_NONE for the initial mapping. #define CONFIG_STRICT_PERM_TEST 0 //Remap another page on the first VMA with remap_file_pages, to make sure the //VMA has VM_NONUNIFORM set, to test the "VM_NONUNIFORM and not remapped page" //code path. #define CONFIG_TEST_DEFAULT_NONUNIFORM 1 //With this set, we should get faults on writes. #define CONFIG_WANT_WRITE_FAULTS 1 //With this set, we should get faults on reads! #define CONFIG_WANT_READ_FAULTS 0 //Will depend on PROT_WRITE implying PROT_READ //Actually, for missing PTE's, the normal fault handler will refuse to satisfy //such a fault, unless you set CONFIG_WRITE_BEFORE_READ to 1, or you set //CONFIG_TEST_DEFAULT_NONUNIFORM to 1 (because a different handling path is //used). #define CONFIG_TEST_WRITE_IMPLIES_READ 1 //Will make sure that we do a WRITE fault before a read one on normal paths. #define CONFIG_WRITE_BEFORE_READ 1 //Will depend on PROT_EXEC implying PROT_READ. Useless with //CONFIG_TEST_WRITE_IMPLIES_READ, since that will avoid both PROT_READ and //PROT_EXEC. #define CONFIG_TEST_EXEC_IMPLIES_READ 0 //Want to force myself out of memory, to test the fault-in paths? #define CONFIG_SWAP_OUT 1 //Use /proc/sys/vm/drop_caches to sync. #define CONFIG_PROCFS_SWAP 0 //Use disk, not shmfs for the file to map from. Broken. #define CONFIG_TMP_ON_DISK 0 #define CONFIG_REUSE_FILE 0 #define CONFIG_POPULATE 1 #define CONFIG_NONBLOCK 1 //Some additional tests. //Debug wrong complete truncation of PTEs, when CONFIG_LINEAR == 1. #define CONFIG_MADVISE_DONTNEED 1 #define CONFIG_TRUNCATE_TEST 0 //Debug wrong truncation of private COW pages; this will also spit out a kernel //warning. For 2.6.16 the patch to support this is not yet ported. #define CONFIG_DEBUG_PRIVATE 0 //Verbosity level for messages. #define CONFIG_VERBOSE 1 #define CONFIG_BASE_OFFSET 550 #define CONFIG_ITER_NUM 30 #define CONFIG_MEMORY 28 //Number of pages to touch to fill memory. Set to mem_size / 4096 //#define CONFIG_SWAP_NPAGES (80 * 1024) #if 0 #define CONFIG_SWAP_NPAGES (5 * 1024) #else #define CONFIG_SWAP_NPAGES (CONFIG_MEMORY * 1024 / 4) #endif #if CONFIG_OVERRIDE_REQUIRE_FAULTS # undef CONFIG_WANT_WRITE_FAULTS # undef CONFIG_WANT_READ_FAULTS # define CONFIG_WANT_WRITE_FAULTS 1 # define CONFIG_WANT_READ_FAULTS 1 # undef CONFIG_OVERRIDE_REQUIRE_FAULTS #endif /* * Configuration section END ************************************************************/ //Don't loop more than once with the same offset? No, repeat the loop anyway. #if 0 #if CONFIG_LINEAR #undef CONFIG_ITER_NUM #define CONFIG_ITER_NUM 0 #endif #endif //To get the posix_fadvise definition. #define _XOPEN_SOURCE 600 //To get the strsignal definition. #define _GNU_SOURCE //This macro is for viewing preprocessed source. #ifndef EXCLUDE_HEADERS #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif #define NONBLOCK_FLAGS (CONFIG_NONBLOCK ? MAP_NONBLOCK : 0) #if CONFIG_TEST_EXEC_IMPLIES_READ #define PROT_WRAPREAD PROT_EXEC #else #define PROT_WRAPREAD PROT_READ #endif //Initial mmap protection. #if CONFIG_STRICT_PERM_TEST #define PROT (PROT_NONE) #else #define PROT (PROT_WRAPREAD|PROT_WRITE) #endif #if !CONFIG_WANT_WRITE_FAULTS && !CONFIG_WANT_READ_FAULTS // && CONFIG_STRICT_PERM_TEST #if CONFIG_TEST_WRITE_IMPLIES_READ //For the single page map. # define PROT2 (PROT_WRITE) //For all the other remaps. # define PROT3 (PROT_WRITE) #else # define PROT2 (PROT_WRAPREAD|PROT_WRITE) # define PROT3 (PROT_WRAPREAD|PROT_WRITE) #endif #elif !CONFIG_WANT_READ_FAULTS # define PROT2 (PROT_WRAPREAD) # define PROT3 (PROT_WRAPREAD) #else # define PROT2 (PROT_NONE) # define PROT3 (PROT_NONE) #endif #define SWAP_SIZE (CONFIG_SWAP_NPAGES * PAGE_SIZE) #if 0 #ifdef __s390__ #define __NR_sys_remap_file_pages 265 #elif __ia64__ #define __NR_sys_remap_file_pages 1259 #elif __i386__ #define __NR_sys_remap_file_pages 280 #else #error "define __NR_sys_remap_file_pages for your architecture" #endif #endif /*_syscall5(int, remap_file_pages, unsigned long, start, unsigned long, len, unsigned long, prot, unsigned long, pgoff, int, flags);*/ /* Wrapper for the new fremap API. */ #define MAP_CHGPROT 0x20000 #define sys_remap_file_pages(a,b,prot,d,flags) \ remap_file_pages((void*)a,b,prot,d,(flags) | MAP_CHGPROT) #define PAGE_SIZE 4096 #define PAGE_WORDS ((signed) (PAGE_SIZE/sizeof(int))) #define CACHE_PAGES 1024 #define CACHE_SIZE (CACHE_PAGES*PAGE_SIZE) #define WINDOW_PAGES (32) #define WINDOW_SIZE (WINDOW_PAGES*PAGE_SIZE) char * buf; int fd; char *ptr; void try_swap_out(void) { posix_fadvise(fd, 0, CACHE_SIZE, POSIX_FADV_DONTNEED); madvise(ptr, WINDOW_SIZE, MADV_DONTNEED); #if CONFIG_PROCFS_SWAP system("sync; echo 1 > /proc/sys/vm/drop_caches"); #else char t = 1; int i; #if CONFIG_SWAP_OUT printf("Swapping myself out\n"); #endif fflush(stdout); for (i = 0; i < 5; i++) { for (i = 0; i < CONFIG_SWAP_NPAGES; i++) { #if CONFIG_SWAP_OUT buf[PAGE_SIZE * i] = t; #else t = buf[PAGE_SIZE * i]; #endif } } #endif } char filename[4096]; void unlink_file(void) { #if !CONFIG_REUSE_FILE unlink(filename); #endif } int global_ret = 0; void unlink_file_sig(int sig) { fprintf(stderr, "There have been %d errors with SIGSEGVs in excess or missing.\n", global_ret); unlink_file(); //Emulate standard exit. Yes, we *must* explicitly exit, since signal //handlers replace normal handling. //This one does not cause exit at this signal, only at next one. //signal(sig, SIG_DFL); _exit(128 + sig); } int segv_happened = 0; int segv_expected = -1; //volatile int *write_ptr; int dummy; #if __x86_64__ #define AX "rax" #else #define AX "eax" #endif void try_set(unsigned int * ptr, int data) { //Like *ptr = data //asm("mov %1, (%0)": : "r"(ptr), "r"(data)); //We must use the below version to have ptr inside EAX, to fix it. asm( "mov %0,%%" AX "\n\t" "mov %1, (%%" AX ")": : "r"(ptr), "r"(data): AX); } int try_get(unsigned int * ptr) { int ret; //asm("mov (%1), %0": "=r"(ret): "r"(ptr)); asm( "mov %1,%%" AX "\n\t" "mov (%%" AX "), %0": "=r"(ret): "r"(ptr): AX); return ret; } #if __x86_64__ # define REG_AX REG_RAX #else # define REG_AX REG_EAX #endif void segv_handle(int sig, __attribute((unused)) siginfo_t * unused, void * ucontext) { ucontext_t * regs = ucontext; if (sig != SIGSEGV) printf("Hey! I'm a SIGSEGV handler, not an handler for %s", strsignal(sig)); else { segv_happened++; //We assume the pointer on which we had the fault is in EAX, to //get this you must use try_set and try_get. regs->uc_mcontext.gregs[REG_AX] = (int)&dummy; if (segv_happened > 10) { printf("SEGV loop detected, exiting.\n"); if (segv_expected == -1) printf("As I said, fix the source.\n"); exit(1); } if (segv_expected == -1) { printf("Unbracketed SEGV, fix your sources!\a\n"); } else if (segv_expected == 0) { printf("Unexpected SEGV, fix your kernel!\a\n"); global_ret++; exit(1); } else if (segv_expected == 1) { #if CONFIG_VERBOSE printf(" **Expected SEGV, don't worry.** "); #endif } } } /* ret_expected is for read faults, to set the value we expect to read (this * will avoid the program exiting for correctness check). */ void __segv_start(int expected, int ret_expected) { if (segv_expected != -1) { printf("Unclosed segv_start!\n Fix your sources."); } segv_happened = 0; segv_expected = expected; dummy = ret_expected; fflush(stdout); } /*Only for write faults*/ #define segv_start(exp) __segv_start(exp, 0) void segv_end(void) { if (!segv_happened && segv_expected) { printf(" **SIGSEGV happened? %s; it was %sexpected** \n\a", segv_happened ? "Yes" : "No", segv_expected ? "" : "un"); global_ret++; } fflush(stdout); segv_expected = -1; /* Avoid getting correct return codes just by chance. */ dummy = -2; } static char cache_contents [CACHE_SIZE]; int mincore_debug(void *ptr, char * str) { unsigned char c = 0; mincore(ptr, PAGE_SIZE, &c); c &= 0x1; printf("%s: incore %d\n", str, c); return c; } static inline void __check_err(int err, void *ptr, int i, int line) { if (err) { perror(""); printf("remap_file_pages() of page %d failed, ptr = %p, err = %d, line %d!\n", i, ptr, err, line); exit(1); } } #define CHECK_ERR(err, ptr, i) __check_err(err, ptr, i, __LINE__) int main(void) { unsigned int *data = NULL; int i, j; int val; int err; /* Number of iterations. */ int offset = CONFIG_ITER_NUM + CONFIG_BASE_OFFSET; atexit(unlink_file); /* Signal handler */ struct sigaction sa = { .sa_sigaction = segv_handle, .sa_flags = SA_SIGINFO, }; sigemptyset(&sa.sa_mask); sigaction(SIGSEGV, &sa, NULL); signal(SIGINT, unlink_file_sig); signal(SIGTERM, unlink_file_sig); /* Create buffer for swapping */ buf = malloc(SWAP_SIZE + 1); if (!buf) { printf("Failed memory allocation!\n"); return 1; } #if !CONFIG_REUSE_FILE /* Create the cache file to be mapped in memory */ #if CONFIG_TMP_ON_DISK strcpy(filename, "./cacheXXXXXX"); #else strcpy(filename, "/dev/shm/cacheXXXXXX"); #endif fd = mkstemp(filename); if (fd < 0) { perror("death"); printf("could not open cachefile!\n"); exit(1); } /* Fill the cache file */ for (i = 0; i < CACHE_PAGES; i++) { int *page = (int *) (cache_contents + i*PAGE_SIZE); for (j = 0; j < PAGE_WORDS; j++) page[j] = i; } if (write(fd, cache_contents, CACHE_SIZE) != CACHE_SIZE) { perror("death"); printf("could not write cachefile!\n"); exit(1); } fsync(fd); posix_fadvise(fd, 0, CACHE_SIZE, POSIX_FADV_DONTNEED); close(fd); #else ///XXX: correct this to the correct name! strcpy(filename, "cachep4hgDS"); #endif sprintf(buf, "ls -lh %s", filename); system(buf); /* Remap it in memory. */ fd = open(filename, O_RDWR); //fd = open(filename, O_RDONLY); #if CONFIG_DEBUG_PRIVATE ptr = mmap(0, WINDOW_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); printf("ptr is %p. ", ptr); //posix_fadvise(fd, 0, CACHE_SIZE, POSIX_FADV_DONTNEED); //madvise(ptr, WINDOW_SIZE, MADV_DONTNEED); printf("Setting ptr to 2\n"); *ptr = 2; printf("Now *ptr is %d\n", *ptr); try_swap_out(); mincore_debug(ptr, "ptr"); printf("Now *ptr is %d\n", *ptr); printf("Over-remapping PTE of ptr; we should now get -EEXIST\n"); //Just to install the PTE. errno = 0; err = remap_file_pages(ptr, PAGE_SIZE, 0, 0, NONBLOCK_FLAGS); //Plain old call, not using MAP_CHGPROT!!! Otherwise I'd use sys_remap_file_pages if (!err) { printf("Over-remapping wrongly succeeded\a\n"); global_ret++; } else if (errno != EEXIST) { printf("Over-remapping failed with unexpected error code\a\n"); global_ret++; } if (err) { perror("MAP_PRIVATE"); printf("remap_file_pages() failed, ptr = %p!\n", ptr); } printf("Now ptr is %p, *ptr is %d\n", ptr, *ptr); if (*ptr != 2) { printf("Read wrong value.\a\n"); global_ret++; } munmap(ptr, WINDOW_SIZE); #endif ptr = mmap(0, WINDOW_SIZE, PROT2, MAP_SHARED #if CONFIG_POPULATE | MAP_POPULATE | NONBLOCK_FLAGS #endif , fd, 0); if (ptr == MAP_FAILED) { printf("mmap() failed, ptr = %p!\n", ptr); exit(1); } /* Remap first page, to fault in second with the VM_NONUNIFORM - non * remapped fault path.*/ #if CONFIG_TEST_DEFAULT_NONUNIFORM err = sys_remap_file_pages((unsigned long)ptr, PAGE_SIZE, PROT_NONE, 0, NONBLOCK_FLAGS); #else err = 0; #endif CHECK_ERR(err, ptr, 0); #define __PAGE_IDX 2 data = (unsigned int *) (ptr + __PAGE_IDX * PAGE_SIZE); mincore_debug(data, "data[4096]"); try_swap_out(); printf("Now reading second page, with default prots: %p\n", data); #if !CONFIG_WRITE_BEFORE_READ && !CONFIG_TEST_DEFAULT_NONUNIFORM && CONFIG_TEST_WRITE_IMPLIES_READ && !CONFIG_WANT_READ_FAULTS printf("NOTE: in this compilation environment, we shouldn't expect " "a fault on read, but i386 and UML always did it; PROT_WRITE " "without PROT_READ gives read faults on missing page. This " "doesn't happen with remap_file_pages(), but we're not " "using it right now.\n"); #endif //Since we mapped the page with PROT_WRITE only, we need to do a write //fault since it's a normal page. #if CONFIG_WRITE_BEFORE_READ segv_start(CONFIG_WANT_WRITE_FAULTS || CONFIG_WANT_READ_FAULTS); try_set(data + 1, __PAGE_IDX); segv_end(); #endif __segv_start(CONFIG_WANT_READ_FAULTS, __PAGE_IDX); val = try_get(data); printf("Ok. Data at data[%d]: %d", PAGE_SIZE, val); segv_end(); if (val != __PAGE_IDX) { printf("\nRead wrong value with PROT2 and single-page linear mapping, found %d expected 1" ", bug in program's fault handling\n", val); exit(1); } else { printf(" as expected\n"); } printf("Setting data[%d]...", PAGE_SIZE); segv_start(CONFIG_WANT_WRITE_FAULTS || CONFIG_WANT_READ_FAULTS); try_set(data, val); segv_end(); printf("Ok\n"); munmap(ptr, WINDOW_SIZE); ptr = mmap(0, WINDOW_SIZE, PROT, MAP_SHARED #if CONFIG_POPULATE | MAP_POPULATE | NONBLOCK_FLAGS #endif , fd, 0); if (ptr == MAP_FAILED) { printf("mmap() failed, ptr = %p!\n", ptr); exit(1); } data = (unsigned int *) ptr; #if 1 //Now, if we use a "0" pgoffset, then it shouldn't work. No, it works //because the page is found in the page cache, and we're not doing //nonblock. But with try_swap_out (and maybe even the above fadvise() is //needed), I get the expected misbehaviour. try_swap_out(); err = mincore_debug(ptr, "ptr"); if (!err) printf("On a buggy kernel, this (incore = 0) will cause " "an unexpected success. " "We expect instead a segmentation fault.\n"); else printf("Going to segfault\n"); err = sys_remap_file_pages((unsigned long)ptr, PAGE_SIZE, PROT_NONE, 0, MAP_NONBLOCK); CHECK_ERR(err, ptr, 0); __segv_start(1, 0); val = try_get(data); printf("Ok. Data at ptr: %d", val); segv_end(); if (val != 0) { printf("\nRead wrong value with PROT_NONE and single-page-mapping, found %d expected 0" ", bug in program's fault handling\n", val); exit(1); } else { printf(" as expected\n"); } #endif printf("data mapping: %p\n", data); printf("mapping pages in reverse order via remap_file_pages():\n"); again: //This just changes the protection of the first page. //When the area was mapped PROT_READ|PROT_WRITE, here we write-protect //it here. //But, on 2.6.4-rc2-mm1, it doesn't work, even if we try_swap_out here //because then we'll probably just get two faults in sequence, and still //no SIGSEGV. //It seems that we cannot reduce permissions, just increase them. In //fact we don't check anything when satisfying a write fault on a //read-only page. #if CONFIG_ITER_NUM + CONFIG_BASE_OFFSET >= CACHE_PAGES #warning Bad configuration, going to have problems, increase CACHE_PAGES or decrease CONFIG_ITER_NUM + CONFIG_BASE_OFFSET! #endif #if CONFIG_LINEAR i = 0; #else i = offset; #endif err = sys_remap_file_pages((unsigned long)ptr, PAGE_SIZE, PROT2, i, NONBLOCK_FLAGS); CHECK_ERR(err, ptr, i); //try_swap_out(); printf("Reading data..."); __segv_start(CONFIG_WANT_READ_FAULTS, i); val = try_get(data); segv_end(); printf("Ok. Data at ptr: %d", val); if (val != i) { printf("\nMapped wrong value with PROT2 and single-page-mapping, found %d expected %d\n", val, i); exit(1); } else { printf(" as expected\n"); } printf("Setting data..."); segv_start(!(PROT2 & PROT_WRITE)); try_set(data, i); segv_end(); printf("OK\n"); #if CONFIG_ITER_NUM + CONFIG_BASE_OFFSET + 1 + WINDOW_PAGES >= CACHE_PAGES #warning Bad configuration, going to have problems later, increase CACHE_PAGES or decrease CONFIG_ITER_NUM + CONFIG_BASE_OFFSET! #endif for (i = 0; i < WINDOW_PAGES; i += 2) { unsigned *page = data + i*(PAGE_SIZE/sizeof(int)); #if CONFIG_LINEAR j = i; #else j = WINDOW_PAGES-i+offset; #endif err = sys_remap_file_pages( page, PAGE_SIZE * 2, PROT3, j, NONBLOCK_FLAGS); CHECK_ERR(err, ptr, j); #if CONFIG_VERBOSE printf("remapped memory page %d, file page offset %d, " "to address %p.\n", i, j, page); #endif } #if CONFIG_MADVISE_DONTNEED madvise(ptr, WINDOW_SIZE, MADV_DONTNEED); #endif #if CONFIG_TRUNCATE_TEST ftruncate(fd, 0); lseek(fd, 0, SEEK_SET); if (write(fd, cache_contents, CACHE_SIZE) != CACHE_SIZE) { perror("death"); printf("could not write cachefile!\n"); exit(1); } #endif try_swap_out(); printf("page contents:\n"); for (i = 0; i < WINDOW_PAGES; i++) { int index = i*(PAGE_SIZE/sizeof(int)); unsigned char c = 0; mincore(data+index, PAGE_SIZE, &c); mincore(data+index, PAGE_SIZE, &c); printf(".. data[%d] (page %d/%p, incore %d): ", index, i, data + index, c & 0x1); /* * Double-check the correctness of the mapping: * the +2 on odd values of "i" is because we remap two pages at * a time. */ if (CONFIG_LINEAR) { j = i; } else { if (i & 1) { j = WINDOW_PAGES-i+offset+2; } else { j = WINDOW_PAGES-i+offset; } } __segv_start(CONFIG_WANT_READ_FAULTS, j); val = try_get(&data[index]); segv_end(); printf("%d - ", val); if (val != j) { printf("hm, mapped incorrect data, found %d expected %d!\n", val, j); exit(1); } //Touch the page! #if 0 data[index] = WINDOW_PAGES-i+offset+2; #else segv_start(CONFIG_WANT_WRITE_FAULTS || CONFIG_WANT_READ_FAULTS); try_set(&data[index], j); segv_end(); #endif printf("OK.\n"); } if (--offset >= CONFIG_BASE_OFFSET) { goto again; } fprintf(stderr, "There have been %d errors with SIGSEGVs in excess or missing.\n", global_ret); return global_ret; }