On 2023/5/11 16:19, HAGIO KAZUHITO(萩尾 一仁) wrote:
On 2023/05/10 20:39, Rongwei Wang wrote:
> Hi, Kazu
>
> As we discussed in previous email, I re-code this patch under x86_64 and arm64 env.
>
> The change log as belows.
>
> change log:
> v1->v2:
> - rename and move zero_paddr and huge_zero_paddr into vm_init();
> - simplify fprintf() code block;
> - support x86_64 arch;
Thank you for the update.
> And if I miss something, please let me know.
>
>
> Thanks,
> -wrw
>
> On 2023/5/10 19:32, Rongwei Wang wrote:
>> Now vtop can not show us the page is zero pfn
>> when PTE or PMD has attached ZERO PAGE. This
>> patch supports show this information directly
>> when using vtop, likes:
>>
>> crash> vtop -c 13674 ffff8917e000
>> VIRTUAL PHYSICAL
>> ffff8917e000 836e71000
>>
>> PAGE DIRECTORY: ffff000802f8d000
>> PGD: ffff000802f8dff8 => 884e29003
>> PUD: ffff000844e29ff0 => 884e93003
>> PMD: ffff000844e93240 => 840413003
>> PTE: ffff000800413bf0 => 160000836e71fc3
>> PAGE: 836e71000 (ZERO PAGE)
btw, how can I create this situation seeing zero page easily?
I also would like to test the patch.
Here a testcase that I used to allocate zero page:
z.c:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <ctype.h>
unsigned long *zero_data;
void read_only(void)
{
unsigned long i, val, count = 0;
printf("hello\n");
while (count++ < 10) {
for (i=0; i<(4 * 1024 * 1024) / sizeof(unsigned long);
i+=1024) {
val = zero_data[i];
}
}
return;
}
int main(int argc, char *argv[])
{
char c;
zero_data = (unsigned long *)malloc(1024 * 1024 * 4);
if (!zero_data) {
printf("malloc fail: exit\n");
return -1;
}
printf("zero: %lx\n", zero_data);
read_only();
c = getchar();
free(zero_data);
exit(EXIT_SUCCESS);
}
When you test in arm64 with 64k base pagesize, the size of allocating
memory is needed to set more than 512MB.
>> PTE PHYSICAL FLAGS
>> 160000836e71fc3 836e71000 (VALID|USER|RDONLY|SHARED|AF|NG|PXN|UXN|SPECIAL)
>>
>> VMA START END FLAGS FILE
>> ffff000844f51860 ffff8917c000 ffff8957d000 100073
>>
>> PAGE PHYSICAL MAPPING INDEX CNT FLAGS
>> fffffe001fbb9c40 836e71000 0 0 1 2ffffc000001000
reserved
a bit verbose, please replace these with "..."
Your mean comment as below is OK?
crash> vtop -c 13674 ffff8917e000
VIRTUAL PHYSICAL
ffff8917e000 836e71000
PAGE DIRECTORY: ffff000802f8d000
PGD: ffff000802f8dff8 => 884e29003
PUD: ffff000844e29ff0 => 884e93003
PMD: ffff000844e93240 => 840413003
PTE: ffff000800413bf0 => 160000836e71fc3
PAGE: 836e71000 (ZERO PAGE)
...
>> If huge page found:
>>
>> crash> vtop -c 14538 ffff95800000
>> VIRTUAL PHYSICAL
>> ffff95800000 910c00000
>>
>> PAGE DIRECTORY: ffff000801fa0000
>> PGD: ffff000801fa0ff8 => 884f53003
>> PUD: ffff000844f53ff0 => 8426cb003
>> PMD: ffff0008026cb560 => 60000910c00fc1
>> PAGE: 910c00000 (2MB, ZERO PAGE)
>>
>> PTE PHYSICAL FLAGS
>> 60000910c00fc1 910c00000 (VALID|USER|RDONLY|SHARED|AF|NG|PXN|UXN)
>>
>> VMA START END FLAGS FILE
>> ffff0000caa711e0 ffff956a9000 ffff95aaa000 100073
>>
>> PAGE PHYSICAL MAPPING INDEX CNT FLAGS
>> fffffe0023230000 910c00000 0 0 1 6ffffc000010000 head
>>
>> That seems be sensible with this patch.
Same as above.
>> Signed-off-by: Rongwei Wang <rongwei.wang(a)linux.alibaba.com>
>> ---
>> arm64.c | 26 +++++++++++++++++---------
>> defs.h | 6 ++++++
>> memory.c | 20 ++++++++++++++++++++
>> x86_64.c | 9 +++++----
>> 4 files changed, 48 insertions(+), 13 deletions(-)
>>
>> diff --git a/arm64.c b/arm64.c
>> index 56fb841..d70f202 100644
>> --- a/arm64.c
>> +++ b/arm64.c
>> @@ -419,7 +419,7 @@ arm64_init(int when)
>> /* use machdep parameters */
>> arm64_calc_phys_offset();
>> arm64_calc_physvirt_offset();
>> -
>> +
>> if (CRASHDEBUG(1)) {
>> if (machdep->flags & NEW_VMEMMAP)
>> fprintf(fp, "kimage_voffset: %lx\n",
>> @@ -1787,7 +1787,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t
*paddr, int verbose)
>> if ((pgd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
>> ulong sectionbase = (pgd_val & SECTION_PAGE_MASK_512MB) &
PHYS_MASK;
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase);
>> + fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase,
>> + IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" :
"");
>> arm64_translate_pte(pgd_val, 0, 0);
>> }
>> *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
>> @@ -1806,7 +1807,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t
*paddr, int verbose)
>> if (pte_val & PTE_VALID) {
>> *paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
>> + fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
>> + IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" :
"");
>> arm64_translate_pte(pte_val, 0, 0);
>> }
>> } else {
>> @@ -1859,7 +1861,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t
*paddr, int verbose)
>> if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
>> ulong sectionbase = PTE_TO_PHYS(pmd_val) &
SECTION_PAGE_MASK_512MB;
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase);
>> + fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase,
>> + IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" :
"");
>> arm64_translate_pte(pmd_val, 0, 0);
>> }
>> *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
>> @@ -1878,7 +1881,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t
*paddr, int verbose)
>> if (pte_val & PTE_VALID) {
>> *paddr = PTE_TO_PHYS(pte_val) + PAGEOFFSET(vaddr);
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
>> + fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
>> + IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" :
"");
>> arm64_translate_pte(pte_val, 0, 0);
>> }
>> } else {
>> @@ -1940,7 +1944,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t
*paddr, int verbose)
>> if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
>> ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) &
PHYS_MASK;
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase);
>> + fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase,
>> + IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" :
"");
>> arm64_translate_pte(pmd_val, 0, 0);
>> }
>> *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB);
>> @@ -1959,7 +1964,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t
*paddr, int verbose)
>> if (pte_val & PTE_VALID) {
>> *paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
>> + fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
>> + IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" :
"");
>> arm64_translate_pte(pte_val, 0, 0);
>> }
>> } else {
>> @@ -2029,7 +2035,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t
*paddr, int verbose)
>> if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
>> ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) &
PHYS_MASK;
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase);
>> + fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase,
>> + IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" :
"");
>> arm64_translate_pte(pmd_val, 0, 0);
>> }
>> *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB);
>> @@ -2048,7 +2055,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t
*paddr, int verbose)
>> if (pte_val & PTE_VALID) {
>> *paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
>> + fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
>> + IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" :
"");
>> arm64_translate_pte(pte_val, 0, 0);
>> }
>> } else {
>> diff --git a/defs.h b/defs.h
>> index 12ad6aa..1be5a68 100644
>> --- a/defs.h
>> +++ b/defs.h
>> @@ -2618,6 +2618,8 @@ struct vm_table { /* kernel VM-related data
*/
>> char *name;
>> } *pageflags_data;
>> ulong max_mem_section_nr;
>> + ulong zero_paddr;
>> + ulong huge_zero_paddr;
Please print these with "help -v", for example, here.
node_online_map_len: 16
node_online_map: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
zero_paddr: 10ef9000
huge_zero_paddr: fffffffffffff000
nr_vm_stat_items: 0
vm_stat_items: (not used)
OK, will do.
>> };
>> #define NODES (0x1)
>> @@ -2999,6 +3001,10 @@ struct load_module {
>> #define VIRTPAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask)
>> #define PHYSPAGEBASE(X) (((physaddr_t)(X)) &
(physaddr_t)machdep->pagemask)
>> +#define IS_ZEROPAGE(paddr) ((paddr) != ~0UL && \
>> + ((paddr) == vt->zero_paddr || \
>> + (paddr) == vt->huge_zero_paddr))
Ah, (paddr) != ~0UL would be unnecessary, if we use ~0UL as the invalid
address.
OK, will do.
>> +
>> /*
>> * Sparse memory stuff
>> * These must follow the definitions in the kernel mmzone.h
>> diff --git a/memory.c b/memory.c
>> index 0568f18..4ad5959 100644
>> --- a/memory.c
>> +++ b/memory.c
>> @@ -1208,6 +1208,26 @@ vm_init(void)
>> machdep->memory_size()));
>> vt->paddr_prlen = strlen(buf);
>> + if (kernel_symbol_exists("zero_pfn")) {
>> + ulong zero_pfn;
>> +
>> + vt->zero_paddr = ~0UL;
Please move this out of the if block to initialize it even if the symbol
does not exist.
OH, that's my fault, will do.
>> + if (readmem(symbol_value("zero_pfn"), KVADDR,
>> + &zero_pfn, sizeof(zero_pfn),
>> + "read zero_pfn", QUIET|RETURN_ON_ERROR))
>> + vt->zero_paddr = zero_pfn << PAGESHIFT();
>> + }
>> +
>> + if (kernel_symbol_exists("huge_zero_pfn")) {
>> + ulong huge_zero_pfn;
>> +
>> + vt->huge_zero_paddr = ~0UL;
Same as above.
>> + if (readmem(symbol_value("huge_zero_pfn"), KVADDR,
>> + &huge_zero_pfn, sizeof(huge_zero_pfn),
>> + "read huge_zero_pfn", QUIET|RETURN_ON_ERROR))
>> + vt->huge_zero_paddr = huge_zero_pfn << PAGESHIFT();
In kernel, huge_zero_pfn is initialized as ~0UL and set to a pfn when
get_huge_zero_page() is called.
crash> p -x huge_zero_pfn
huge_zero_pfn = $4 = 0xffffffffffffffff
so if huge_zero_pfn is ~0UL, vt->huge_zero_paddr should not be set.
Good catch.
How about following modification:
+ if (readmem(symbol_value("huge_zero_pfn"), KVADDR,
+ &huge_zero_pfn, sizeof(huge_zero_pfn),
+ "read huge_zero_pfn", QUIET|RETURN_ON_ERROR))
+ vt->huge_zero_paddr = (huge_zero_pfn != ~0UL) ?
(huge_zero_pfn << PAGESHIFT());
>> + }
>> +
>> if (vt->flags & PERCPU_KMALLOC_V1)
>> vt->dump_kmem_cache = dump_kmem_cache_percpu_v1;
>> else if (vt->flags & PERCPU_KMALLOC_V2)
>> diff --git a/x86_64.c b/x86_64.c
>> index 5019c69..693a08b 100644
>> --- a/x86_64.c
>> +++ b/x86_64.c
>> @@ -2114,8 +2114,9 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr,
physaddr_t *paddr, in
>> goto no_upage;
>> if (pmd_pte & _PAGE_PSE) {
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx (2MB)\n\n",
>> - PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK);
>> + fprintf(fp, " PAGE: %lx (2MB%s)\n\n",
>> + PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK,
>> + IS_ZEROPAGE(PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK) ?
", ZERO PAGE" : "");
>> x86_64_translate_pte(pmd_pte, 0, 0);
>> }
>> @@ -2143,8 +2144,8 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr,
physaddr_t *paddr, in
>> *paddr = (PAGEBASE(pte) & PHYSICAL_PAGE_MASK) + PAGEOFFSET(uvaddr);
>> if (verbose) {
>> - fprintf(fp, " PAGE: %lx\n\n",
>> - PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK);
>> + fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr) &
PHYSICAL_PAGE_MASK,
>> + IS_ZEROPAGE(PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK) ? "(ZERO
PAGE)" : "");
>> x86_64_translate_pte(pte, 0, 0);
>> }
why only for uvtop_level4?
That because I have no idea about xyz_xen_wpt and similar target. In
addition, I have no environment to test that, so...
How about doing this when someone need it in these environment? Or I can
do it if you think just complete it directly is ok.
Thanks for your time!
-wrw
Thanks,
Kazu