Merge branch 'x86/mm' into x86/asm, to pick up pending changes
Concentrate x86 MM and asm related changes into a single super-topic, in preparation for larger changes. Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
75ec4eb3dc
@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
|
||||
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
|
||||
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
|
||||
... unused hole ...
|
||||
ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
|
||||
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
|
||||
... unused hole ...
|
||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||
... unused hole ...
|
||||
|
@ -302,7 +302,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
||||
config KASAN_SHADOW_OFFSET
|
||||
hex
|
||||
depends on KASAN
|
||||
default 0xdff8000000000000 if X86_5LEVEL
|
||||
default 0xdffffc0000000000
|
||||
|
||||
config HAVE_INTEL_TXT
|
||||
|
@ -24,7 +24,8 @@ endif
|
||||
KASAN_SANITIZE_head$(BITS).o := n
|
||||
KASAN_SANITIZE_dumpstack.o := n
|
||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
KASAN_SANITIZE_stacktrace.o := n
|
||||
KASAN_SANITIZE_stacktrace.o := n
|
||||
KASAN_SANITIZE_paravirt.o := n
|
||||
|
||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
||||
|
@ -37,11 +37,12 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
|
||||
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
|
||||
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
|
||||
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
|
||||
#endif
|
||||
L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
|
||||
.text
|
||||
@ -361,10 +362,7 @@ NEXT_PAGE(early_dynamic_pgts)
|
||||
|
||||
.data
|
||||
|
||||
#ifndef CONFIG_XEN
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
.fill 512,8,0
|
||||
#else
|
||||
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
|
||||
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
|
||||
@ -381,6 +379,9 @@ NEXT_PAGE(level2_ident_pgt)
|
||||
* Don't set NX because code runs from these pages.
|
||||
*/
|
||||
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
||||
#else
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
.fill 512,8,0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
|
@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
|
||||
void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
struct page *start_page, unsigned long size)
|
||||
struct page *start_page, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long addr = (unsigned long)start_page;
|
||||
unsigned long end = (unsigned long)(start_page + size);
|
||||
unsigned long end = (unsigned long)(start_page + nr_pages);
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
unsigned int nr_pages;
|
||||
unsigned int nr_pmd_pages;
|
||||
struct page *page;
|
||||
|
||||
for (; addr < end; addr = next) {
|
||||
@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
|
||||
nr_pages = 1 << (get_order(PMD_SIZE));
|
||||
nr_pmd_pages = 1 << get_order(PMD_SIZE);
|
||||
page = pmd_page(*pmd);
|
||||
while (nr_pages--)
|
||||
while (nr_pmd_pages--)
|
||||
get_page_bootmem(section_nr, page++,
|
||||
SECTION_INFO);
|
||||
}
|
||||
|
@ -15,6 +15,8 @@
|
||||
|
||||
extern struct range pfn_mapped[E820_MAX_ENTRIES];
|
||||
|
||||
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
||||
|
||||
static int __init map_range(struct range *range)
|
||||
{
|
||||
unsigned long start;
|
||||
@ -30,8 +32,10 @@ static void __init clear_pgds(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
/* See comment in kasan_init() */
|
||||
unsigned long pgd_end = end & PGDIR_MASK;
|
||||
|
||||
for (; start < end; start += PGDIR_SIZE) {
|
||||
for (; start < pgd_end; start += PGDIR_SIZE) {
|
||||
pgd = pgd_offset_k(start);
|
||||
/*
|
||||
* With folded p4d, pgd_clear() is nop, use p4d_clear()
|
||||
@ -42,29 +46,61 @@ static void __init clear_pgds(unsigned long start,
|
||||
else
|
||||
pgd_clear(pgd);
|
||||
}
|
||||
|
||||
pgd = pgd_offset_k(start);
|
||||
for (; start < end; start += P4D_SIZE)
|
||||
p4d_clear(p4d_offset(pgd, start));
|
||||
}
|
||||
|
||||
static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
|
||||
{
|
||||
unsigned long p4d;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
return (p4d_t *)pgd;
|
||||
|
||||
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
|
||||
p4d += __START_KERNEL_map - phys_base;
|
||||
return (p4d_t *)p4d + p4d_index(addr);
|
||||
}
|
||||
|
||||
static void __init kasan_early_p4d_populate(pgd_t *pgd,
|
||||
unsigned long addr,
|
||||
unsigned long end)
|
||||
{
|
||||
pgd_t pgd_entry;
|
||||
p4d_t *p4d, p4d_entry;
|
||||
unsigned long next;
|
||||
|
||||
if (pgd_none(*pgd)) {
|
||||
pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
|
||||
set_pgd(pgd, pgd_entry);
|
||||
}
|
||||
|
||||
p4d = early_p4d_offset(pgd, addr);
|
||||
do {
|
||||
next = p4d_addr_end(addr, end);
|
||||
|
||||
if (!p4d_none(*p4d))
|
||||
continue;
|
||||
|
||||
p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
|
||||
set_p4d(p4d, p4d_entry);
|
||||
} while (p4d++, addr = next, addr != end && p4d_none(*p4d));
|
||||
}
|
||||
|
||||
static void __init kasan_map_early_shadow(pgd_t *pgd)
|
||||
{
|
||||
int i;
|
||||
unsigned long start = KASAN_SHADOW_START;
|
||||
/* See comment in kasan_init() */
|
||||
unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
|
||||
unsigned long end = KASAN_SHADOW_END;
|
||||
unsigned long next;
|
||||
|
||||
for (i = pgd_index(start); start < end; i++) {
|
||||
switch (CONFIG_PGTABLE_LEVELS) {
|
||||
case 4:
|
||||
pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
|
||||
_KERNPG_TABLE);
|
||||
break;
|
||||
case 5:
|
||||
pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
|
||||
_KERNPG_TABLE);
|
||||
break;
|
||||
default:
|
||||
BUILD_BUG();
|
||||
}
|
||||
start += PGDIR_SIZE;
|
||||
}
|
||||
pgd += pgd_index(addr);
|
||||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
kasan_early_p4d_populate(pgd, addr, next);
|
||||
} while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
@ -101,7 +137,7 @@ void __init kasan_early_init(void)
|
||||
for (i = 0; i < PTRS_PER_PUD; i++)
|
||||
kasan_zero_pud[i] = __pud(pud_val);
|
||||
|
||||
for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
|
||||
for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
|
||||
kasan_zero_p4d[i] = __p4d(p4d_val);
|
||||
|
||||
kasan_map_early_shadow(early_top_pgt);
|
||||
@ -117,12 +153,35 @@ void __init kasan_init(void)
|
||||
#endif
|
||||
|
||||
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
|
||||
|
||||
/*
|
||||
* We use the same shadow offset for 4- and 5-level paging to
|
||||
* facilitate boot-time switching between paging modes.
|
||||
* As result in 5-level paging mode KASAN_SHADOW_START and
|
||||
* KASAN_SHADOW_END are not aligned to PGD boundary.
|
||||
*
|
||||
* KASAN_SHADOW_START doesn't share PGD with anything else.
|
||||
* We claim whole PGD entry to make things easier.
|
||||
*
|
||||
* KASAN_SHADOW_END lands in the last PGD entry and it collides with
|
||||
* bunch of things like kernel code, modules, EFI mapping, etc.
|
||||
* We need to take extra steps to not overwrite them.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
void *ptr;
|
||||
|
||||
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
|
||||
memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
|
||||
set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
|
||||
__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
|
||||
}
|
||||
|
||||
load_cr3(early_top_pgt);
|
||||
__flush_tlb_all();
|
||||
|
||||
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
|
||||
clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
|
||||
|
||||
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
|
||||
kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
|
||||
kasan_mem_to_shadow((void *)PAGE_OFFSET));
|
||||
|
||||
for (i = 0; i < E820_MAX_ENTRIES; i++) {
|
||||
|
@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS == 4
|
||||
#ifdef CONFIG_X86_64
|
||||
__visible pudval_t xen_pud_val(pud_t pud)
|
||||
{
|
||||
return pte_mfn_to_pfn(pud.pud);
|
||||
@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_MMU);
|
||||
}
|
||||
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
|
||||
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
|
||||
@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
|
||||
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
|
||||
bool last, unsigned long limit)
|
||||
{
|
||||
int i, nr, flush = 0;
|
||||
int flush = 0;
|
||||
pud_t *pud;
|
||||
|
||||
nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
|
||||
for (i = 0; i < nr; i++) {
|
||||
pud_t *pud;
|
||||
|
||||
if (p4d_none(p4d[i]))
|
||||
continue;
|
||||
if (p4d_none(*p4d))
|
||||
return flush;
|
||||
|
||||
pud = pud_offset(&p4d[i], 0);
|
||||
if (PTRS_PER_PUD > 1)
|
||||
flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
|
||||
flush |= xen_pud_walk(mm, pud, func,
|
||||
last && i == nr - 1, limit);
|
||||
}
|
||||
pud = pud_offset(p4d, 0);
|
||||
if (PTRS_PER_PUD > 1)
|
||||
flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
|
||||
flush |= xen_pud_walk(mm, pud, func, last, limit);
|
||||
return flush;
|
||||
}
|
||||
|
||||
@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
|
||||
continue;
|
||||
|
||||
p4d = p4d_offset(&pgd[i], 0);
|
||||
if (PTRS_PER_P4D > 1)
|
||||
flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
|
||||
flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
|
||||
}
|
||||
|
||||
@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
unsigned int i;
|
||||
bool unpin;
|
||||
|
||||
unpin = (vaddr == 2 * PGDIR_SIZE);
|
||||
vaddr &= PMD_MASK;
|
||||
pgd = pgd_offset_k(vaddr);
|
||||
p4d = p4d_offset(pgd, 0);
|
||||
for (i = 0; i < PTRS_PER_P4D; i++) {
|
||||
if (p4d_none(p4d[i]))
|
||||
continue;
|
||||
xen_cleanmfnmap_p4d(p4d + i, unpin);
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
set_pgd(pgd, __pgd(0));
|
||||
xen_cleanmfnmap_free_pgtbl(p4d, unpin);
|
||||
}
|
||||
if (!p4d_none(*p4d))
|
||||
xen_cleanmfnmap_p4d(p4d, unpin);
|
||||
}
|
||||
|
||||
static void __init xen_pagetable_p2m_free(void)
|
||||
@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn)
|
||||
xen_release_ptpage(pfn, PT_PMD);
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
#ifdef CONFIG_X86_64
|
||||
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
|
||||
{
|
||||
xen_alloc_ptpage(mm, pfn, PT_PUD);
|
||||
@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
|
||||
*/
|
||||
void __init xen_relocate_p2m(void)
|
||||
{
|
||||
phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
|
||||
phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
|
||||
unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
|
||||
int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
|
||||
int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
|
||||
pte_t *pt;
|
||||
pmd_t *pmd;
|
||||
pud_t *pud;
|
||||
p4d_t *p4d = NULL;
|
||||
pgd_t *pgd;
|
||||
unsigned long *new_p2m;
|
||||
int save_pud;
|
||||
@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void)
|
||||
n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
|
||||
n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
|
||||
n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
|
||||
if (PTRS_PER_P4D > 1)
|
||||
n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
|
||||
else
|
||||
n_p4d = 0;
|
||||
n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
|
||||
n_frames = n_pte + n_pt + n_pmd + n_pud;
|
||||
|
||||
new_area = xen_find_free_area(PFN_PHYS(n_frames));
|
||||
if (!new_area) {
|
||||
@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void)
|
||||
* To avoid any possible virtual address collision, just use
|
||||
* 2 * PUD_SIZE for the new area.
|
||||
*/
|
||||
p4d_phys = new_area;
|
||||
pud_phys = p4d_phys + PFN_PHYS(n_p4d);
|
||||
pud_phys = new_area;
|
||||
pmd_phys = pud_phys + PFN_PHYS(n_pud);
|
||||
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
|
||||
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
|
||||
|
||||
pgd = __va(read_cr3_pa());
|
||||
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
|
||||
idx_p4d = 0;
|
||||
save_pud = n_pud;
|
||||
do {
|
||||
if (n_p4d > 0) {
|
||||
p4d = early_memremap(p4d_phys, PAGE_SIZE);
|
||||
clear_page(p4d);
|
||||
n_pud = min(save_pud, PTRS_PER_P4D);
|
||||
}
|
||||
for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
|
||||
pud = early_memremap(pud_phys, PAGE_SIZE);
|
||||
clear_page(pud);
|
||||
for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
|
||||
idx_pmd++) {
|
||||
pmd = early_memremap(pmd_phys, PAGE_SIZE);
|
||||
clear_page(pmd);
|
||||
for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
|
||||
idx_pt++) {
|
||||
pt = early_memremap(pt_phys, PAGE_SIZE);
|
||||
clear_page(pt);
|
||||
for (idx_pte = 0;
|
||||
idx_pte < min(n_pte, PTRS_PER_PTE);
|
||||
idx_pte++) {
|
||||
set_pte(pt + idx_pte,
|
||||
pfn_pte(p2m_pfn, PAGE_KERNEL));
|
||||
p2m_pfn++;
|
||||
}
|
||||
n_pte -= PTRS_PER_PTE;
|
||||
early_memunmap(pt, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pt_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
|
||||
PFN_DOWN(pt_phys));
|
||||
set_pmd(pmd + idx_pt,
|
||||
__pmd(_PAGE_TABLE | pt_phys));
|
||||
pt_phys += PAGE_SIZE;
|
||||
for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
|
||||
pud = early_memremap(pud_phys, PAGE_SIZE);
|
||||
clear_page(pud);
|
||||
for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
|
||||
idx_pmd++) {
|
||||
pmd = early_memremap(pmd_phys, PAGE_SIZE);
|
||||
clear_page(pmd);
|
||||
for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
|
||||
idx_pt++) {
|
||||
pt = early_memremap(pt_phys, PAGE_SIZE);
|
||||
clear_page(pt);
|
||||
for (idx_pte = 0;
|
||||
idx_pte < min(n_pte, PTRS_PER_PTE);
|
||||
idx_pte++) {
|
||||
set_pte(pt + idx_pte,
|
||||
pfn_pte(p2m_pfn, PAGE_KERNEL));
|
||||
p2m_pfn++;
|
||||
}
|
||||
n_pt -= PTRS_PER_PMD;
|
||||
early_memunmap(pmd, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pmd_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
|
||||
PFN_DOWN(pmd_phys));
|
||||
set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
|
||||
pmd_phys += PAGE_SIZE;
|
||||
n_pte -= PTRS_PER_PTE;
|
||||
early_memunmap(pt, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pt_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
|
||||
PFN_DOWN(pt_phys));
|
||||
set_pmd(pmd + idx_pt,
|
||||
__pmd(_PAGE_TABLE | pt_phys));
|
||||
pt_phys += PAGE_SIZE;
|
||||
}
|
||||
n_pmd -= PTRS_PER_PUD;
|
||||
early_memunmap(pud, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pud_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
|
||||
if (n_p4d > 0)
|
||||
set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
|
||||
else
|
||||
set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
|
||||
pud_phys += PAGE_SIZE;
|
||||
n_pt -= PTRS_PER_PMD;
|
||||
early_memunmap(pmd, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pmd_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
|
||||
PFN_DOWN(pmd_phys));
|
||||
set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
|
||||
pmd_phys += PAGE_SIZE;
|
||||
}
|
||||
if (n_p4d > 0) {
|
||||
save_pud -= PTRS_PER_P4D;
|
||||
early_memunmap(p4d, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(p4d_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
|
||||
set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
|
||||
p4d_phys += PAGE_SIZE;
|
||||
}
|
||||
} while (++idx_p4d < n_p4d);
|
||||
n_pmd -= PTRS_PER_PUD;
|
||||
early_memunmap(pud, PAGE_SIZE);
|
||||
make_lowmem_page_readonly(__va(pud_phys));
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
|
||||
set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
|
||||
pud_phys += PAGE_SIZE;
|
||||
}
|
||||
|
||||
/* Now copy the old p2m info to the new area. */
|
||||
memcpy(new_p2m, xen_p2m_addr, size);
|
||||
@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void)
|
||||
pv_mmu_ops.set_pte = xen_set_pte;
|
||||
pv_mmu_ops.set_pmd = xen_set_pmd;
|
||||
pv_mmu_ops.set_pud = xen_set_pud;
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
#ifdef CONFIG_X86_64
|
||||
pv_mmu_ops.set_p4d = xen_set_p4d;
|
||||
#endif
|
||||
|
||||
@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void)
|
||||
pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
|
||||
pv_mmu_ops.release_pte = xen_release_pte;
|
||||
pv_mmu_ops.release_pmd = xen_release_pmd;
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
#ifdef CONFIG_X86_64
|
||||
pv_mmu_ops.alloc_pud = xen_alloc_pud;
|
||||
pv_mmu_ops.release_pud = xen_release_pud;
|
||||
#endif
|
||||
@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
|
||||
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
|
||||
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 4
|
||||
#ifdef CONFIG_X86_64
|
||||
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
|
||||
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
|
||||
.set_p4d = xen_set_p4d_hyper,
|
||||
|
||||
.alloc_pud = xen_alloc_pmd_init,
|
||||
.release_pud = xen_release_pmd_init,
|
||||
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
.activate_mm = xen_activate_mm,
|
||||
.dup_mmap = xen_dup_mmap,
|
||||
|
@ -2495,7 +2495,7 @@ void vmemmap_populate_print_last(void);
|
||||
void vmemmap_free(unsigned long start, unsigned long end);
|
||||
#endif
|
||||
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
|
||||
unsigned long size);
|
||||
unsigned long nr_pages);
|
||||
|
||||
enum mf_flags {
|
||||
MF_COUNT_INCREASED = 1 << 0,
|
||||
|
@ -1150,13 +1150,17 @@ struct mem_section {
|
||||
#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
extern struct mem_section *mem_section[NR_SECTION_ROOTS];
|
||||
extern struct mem_section **mem_section;
|
||||
#else
|
||||
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
|
||||
#endif
|
||||
|
||||
static inline struct mem_section *__nr_to_section(unsigned long nr)
|
||||
{
|
||||
#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
if (!mem_section)
|
||||
return NULL;
|
||||
#endif
|
||||
if (!mem_section[SECTION_NR_TO_ROOT(nr)])
|
||||
return NULL;
|
||||
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
|
||||
|
97
mm/gup.c
97
mm/gup.c
@ -1643,6 +1643,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void gup_pgd_range(unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pgd_t *pgdp;
|
||||
|
||||
pgdp = pgd_offset(current->mm, addr);
|
||||
do {
|
||||
pgd_t pgd = READ_ONCE(*pgdp);
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
return;
|
||||
if (unlikely(pgd_huge(pgd))) {
|
||||
if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
|
||||
pages, nr))
|
||||
return;
|
||||
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
||||
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
|
||||
PGDIR_SHIFT, next, write, pages, nr))
|
||||
return;
|
||||
} else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
|
||||
return;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
#ifndef gup_fast_permitted
|
||||
/*
|
||||
* Check if it's allowed to use __get_user_pages_fast() for the range, or
|
||||
* we need to fall back to the slow version:
|
||||
*/
|
||||
bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
{
|
||||
unsigned long len, end;
|
||||
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
return end >= start;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
|
||||
* the regular GUP. It will only return non-negative values.
|
||||
@ -1650,10 +1691,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
unsigned long next, flags;
|
||||
pgd_t *pgdp;
|
||||
unsigned long flags;
|
||||
int nr = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
@ -1677,45 +1716,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
* block IPIs that come from THPs splitting.
|
||||
*/
|
||||
|
||||
local_irq_save(flags);
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = READ_ONCE(*pgdp);
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
break;
|
||||
if (unlikely(pgd_huge(pgd))) {
|
||||
if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
|
||||
pages, &nr))
|
||||
break;
|
||||
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
|
||||
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
|
||||
PGDIR_SHIFT, next, write, pages, &nr))
|
||||
break;
|
||||
} else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
|
||||
break;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
local_irq_restore(flags);
|
||||
if (gup_fast_permitted(start, nr_pages, write)) {
|
||||
local_irq_save(flags);
|
||||
gup_pgd_range(addr, end, write, pages, &nr);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
#ifndef gup_fast_permitted
|
||||
/*
|
||||
* Check if it's allowed to use __get_user_pages_fast() for the range, or
|
||||
* we need to fall back to the slow version:
|
||||
*/
|
||||
bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
{
|
||||
unsigned long len, end;
|
||||
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
return end >= start;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* get_user_pages_fast() - pin user pages in memory
|
||||
* @start: starting user address
|
||||
@ -1735,12 +1744,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
unsigned long addr, len, end;
|
||||
int nr = 0, ret = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
addr = start;
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
|
||||
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
|
||||
(void __user *)start, len)))
|
||||
return 0;
|
||||
|
||||
if (gup_fast_permitted(start, nr_pages, write)) {
|
||||
nr = __get_user_pages_fast(start, nr_pages, write, pages);
|
||||
local_irq_disable();
|
||||
gup_pgd_range(addr, end, write, pages, &nr);
|
||||
local_irq_enable();
|
||||
ret = nr;
|
||||
}
|
||||
|
||||
|
@ -5646,6 +5646,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
|
||||
unsigned long start_pfn, end_pfn;
|
||||
int i, this_nid;
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
if (!mem_section) {
|
||||
unsigned long size, align;
|
||||
|
||||
size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
|
||||
align = 1 << (INTERNODE_CACHE_SHIFT);
|
||||
mem_section = memblock_virt_alloc(size, align);
|
||||
}
|
||||
#endif
|
||||
|
||||
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
|
||||
memory_present(this_nid, start_pfn, end_pfn);
|
||||
}
|
||||
|
17
mm/sparse.c
17
mm/sparse.c
@ -22,8 +22,7 @@
|
||||
* 1) mem_section - memory sections, mem_map's for valid memory
|
||||
*/
|
||||
#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
struct mem_section *mem_section[NR_SECTION_ROOTS]
|
||||
____cacheline_internodealigned_in_smp;
|
||||
struct mem_section **mem_section;
|
||||
#else
|
||||
struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
|
||||
____cacheline_internodealigned_in_smp;
|
||||
@ -100,7 +99,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
|
||||
int __section_nr(struct mem_section* ms)
|
||||
{
|
||||
unsigned long root_nr;
|
||||
struct mem_section* root;
|
||||
struct mem_section *root = NULL;
|
||||
|
||||
for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
|
||||
root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
|
||||
@ -111,7 +110,7 @@ int __section_nr(struct mem_section* ms)
|
||||
break;
|
||||
}
|
||||
|
||||
VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
|
||||
VM_BUG_ON(!root);
|
||||
|
||||
return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
|
||||
}
|
||||
@ -329,11 +328,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
|
||||
static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
|
||||
{
|
||||
unsigned long usemap_snr, pgdat_snr;
|
||||
static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
|
||||
static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
|
||||
static unsigned long old_usemap_snr;
|
||||
static unsigned long old_pgdat_snr;
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
int usemap_nid;
|
||||
|
||||
/* First call */
|
||||
if (!old_usemap_snr) {
|
||||
old_usemap_snr = NR_MEM_SECTIONS;
|
||||
old_pgdat_snr = NR_MEM_SECTIONS;
|
||||
}
|
||||
|
||||
usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
|
||||
pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
|
||||
if (usemap_snr == pgdat_snr)
|
||||
|
Loading…
Reference in New Issue
Block a user