[Crash-utility PATCH] crash: arm: LPAE: fix bugs when translating address
by Liu Hua
For virtual address with idmap, we just use VTOP to
do the translate. So we should not use one way for
arm and LPAE enabled arm.
This bugs occurs when the phys_base exceeds 4G. So
we could not meet this bugs at most case. But for
keystone platfrom, whose codes has been upstreamed
recently. It boots on 2G-4G physical maps, then
rebuilds its pagetables on 16G-18G. So, its phys_base
is 16G.
Signed-off-by: Liu Hua <sdu.liu(a)huawei.com>
---
arm.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/arm.c b/arm.c
index 23d9b51..43e3aab 100644
--- a/arm.c
+++ b/arm.c
@@ -1095,6 +1095,18 @@ arm_lpae_vtop(ulong vaddr, ulong *pgd, physaddr_t *paddr, int verbose)
pmd_t pmd_pte;
pte_t pte;
+ if (!vt->vmalloc_start) {
+ *paddr = LPAE_VTOP(vaddr);
+ return TRUE;
+ }
+
+ if (!IS_VMALLOC_ADDR(vaddr)) {
+ *paddr = LPAE_VTOP(vaddr);
+ if (!verbose)
+ return TRUE;
+ }
+
+
if (verbose)
fprintf(fp, "PAGE DIRECTORY: %lx\n", (ulong)pgd);
@@ -1231,6 +1243,11 @@ arm_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose)
if (!IS_KVADDR(kvaddr))
return FALSE;
+ if (machdep->flags & PAE)
+ return arm_lpae_vtop(kvaddr, (ulong *)vt->kernel_pgd[0],
+ paddr, verbose);
+
+
if (!vt->vmalloc_start) {
*paddr = VTOP(kvaddr);
return TRUE;
@@ -1242,9 +1259,6 @@ arm_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose)
return TRUE;
}
- if (machdep->flags & PAE)
- return arm_lpae_vtop(kvaddr, (ulong *)vt->kernel_pgd[0],
- paddr, verbose);
return arm_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose);
}
--
1.9.0
9 years, 11 months
[PATCH] crash: s390x: Add vector register support
by Michael Holzheu
Hello Dave,
This patch adds support for the new s390x vector registers.
For ELF dumps the registers are taken from the VX ELF notes, for
s390 dumps the registers are taken from memory. The kernel stores
a pointer the save area in the CPU lowcores at offset 0x11b0.
This patch also adds a new -A option to the "bt" command. The
new vector registers are only shown when this option is specified.
This is done because for normal degugging we do not want to
pollute the bt output with the large vector register output (512 byte).
The following shows an output example:
crash> bt -a -A
PID: 2387 TASK: 1785a5e8 CPU: 0 COMMAND: "bash"
LOWCORE INFO:
-psw : 0x0400d00180000000 0x0000000000112aa0
-function : store_status at 112aa0
-prefix : 0x1fffc000
-cpu timer: 0x7ffffff3 0x0066ef81
-clock cmp: 0x0066ef81 0000000000
-general registers:
000000000000000000 0x0400c00180000000
....
- vector registers:
0x404b000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x404b000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
Signed-off-by: Michael Holzheu <holzheu(a)linux.vnet.ibm.com>
---
defs.h | 1
help.c | 1
kernel.c | 5 ++-
netdump.c | 6 +++
netdump.h | 14 ++++++++
s390x.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 126 insertions(+), 1 deletion(-)
--- a/defs.h
+++ b/defs.h
@@ -4966,6 +4966,7 @@ ulong cpu_map_addr(const char *type);
#define BT_FULL_SYM_SLAB2 (0x400000000000ULL)
#define BT_EFRAME_TARGET (0x800000000000ULL)
#define BT_CPUMASK (0x1000000000000ULL)
+#define BT_SHOW_ALL_REGS (0x2000000000000ULL)
#define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
#define BT_REF_HEXVAL (0x1)
--- a/help.c
+++ b/help.c
@@ -1742,6 +1742,7 @@ char *help_bt[] = {
" trace of the current context will be displayed.\n",
" -a displays the stack traces of the active task on each CPU.",
" (only applicable to crash dumps)",
+" -A displays all available CPU registers.",
" -c cpu display the stack trace of the active task on one or more CPUs,",
" which can be specified using the format \"3\", \"1,8,9\", \"1-23\",",
" or \"1,8,9-14\". (only applicable to crash dumps)",
--- a/kernel.c
+++ b/kernel.c
@@ -2003,12 +2003,15 @@ cmd_bt(void)
if (kt->flags & USE_OLD_BT)
bt->flags |= BT_OLD_BACK_TRACE;
- while ((c = getopt(argcnt, args, "D:fFI:S:c:aloreEgstTdxR:O")) != EOF) {
+ while ((c = getopt(argcnt, args, "D:fFI:S:c:aAloreEgstTdxR:O")) != EOF) {
switch (c)
{
case 'f':
bt->flags |= BT_FULL;
break;
+ case 'A':
+ bt->flags |= BT_SHOW_ALL_REGS;
+ break;
case 'F':
if (bt->flags & BT_FULL_SYM_SLAB)
--- a/netdump.c
+++ b/netdump.c
@@ -2086,6 +2086,12 @@ dump_Elf64_Nhdr(Elf64_Off offset, int st
case NT_S390_PREFIX:
netdump_print("(NT_S390_PREFIX)\n");
break;
+ case NT_S390_VXRS_LOW:
+ netdump_print("(NT_S390_VXRS_LOW)\n");
+ break;
+ case NT_S390_VXRS_HIGH:
+ netdump_print("(NT_S390_VXRS_HIGH)\n");
+ break;
case NT_TASKSTRUCT:
netdump_print("(NT_TASKSTRUCT)\n");
if (STRNEQ(buf, "SNAP"))
--- a/netdump.h
+++ b/netdump.h
@@ -169,6 +169,20 @@ struct xen_kdump_data {
#define NT_S390_PREFIX 0x305
#endif
+/*
+ * S390 vector registers 0-15 upper half note (16 * u64)
+ */
+#ifndef NT_S390_VXRS_LOW
+#define NT_S390_VXRS_LOW 0x309
+#endif
+
+/*
+ * S390 vector registers 16-31 note (16 * u128)
+ */
+#ifndef NT_S390_VXRS_HIGH
+#define NT_S390_VXRS_HIGH 0x30a
+#endif
+
#define MAX_KCORE_ELF_HEADER_SIZE (32768)
struct proc_kcore_data {
--- a/s390x.c
+++ b/s390x.c
@@ -41,6 +41,7 @@
#define KERNEL_STACK_SIZE STACKSIZE() // can be 8192 or 16384
#define LOWCORE_SIZE 8192
+#define VX_SA_SIZE (32 * 16)
#define S390X_PSW_MASK_PSTATE 0x0001000000000000UL
@@ -72,6 +73,11 @@ struct s390x_nt_fpregset {
uint64_t fprs[16];
} __attribute__ ((packed));
+struct s390x_vxrs {
+ uint64_t low;
+ uint64_t high;
+} __attribute__ ((packed));
+
/*
* s390x CPU info
*/
@@ -87,6 +93,8 @@ struct s390x_cpu
uint64_t timer;
uint64_t todcmp;
uint32_t todpreg;
+ uint64_t vxrs_low[16];
+ struct s390x_vxrs vxrs_high[16];
};
/*
@@ -133,6 +141,27 @@ static unsigned long readmem_ul(unsigned
}
/*
+ * Print hex data
+ */
+static void print_hex_buf(void *buf, int len, int cols, char *tag)
+{
+ int j, first = 1;
+
+ for (j = 0; j < len; j += 8) {
+ if (j % (cols * 8) == 0) {
+ if (first)
+ first = 0;
+ else
+ fprintf(fp, "\n");
+ fprintf(fp, "%s", tag);
+ }
+ fprintf(fp, "%#018lx ", *((unsigned long *)(buf + j)));
+ }
+ if (len)
+ fprintf(fp, "\n");
+}
+
+/*
* Initialize member offsets
*/
static void s390x_offsets_init(void)
@@ -271,6 +300,16 @@ static void s390x_elf_nt_prefix_add(stru
memcpy(&cpu->prefix, desc, sizeof(cpu->prefix));
}
+static void s390x_elf_nt_vxrs_low_add(struct s390x_cpu *cpu, void *desc)
+{
+ memcpy(&cpu->vxrs_low, desc, sizeof(cpu->vxrs_low));
+}
+
+static void s390x_elf_nt_vxrs_high_add(struct s390x_cpu *cpu, void *desc)
+{
+ memcpy(&cpu->vxrs_high, desc, sizeof(cpu->vxrs_high));
+}
+
static void *get_elf_note_desc(Elf64_Nhdr *note)
{
void *ptr = note;
@@ -315,6 +354,12 @@ static void s390x_elf_note_add(int elf_c
case NT_S390_PREFIX:
s390x_elf_nt_prefix_add(cpu, desc);
break;
+ case NT_S390_VXRS_LOW:
+ s390x_elf_nt_vxrs_low_add(cpu, desc);
+ break;
+ case NT_S390_VXRS_HIGH:
+ s390x_elf_nt_vxrs_high_add(cpu, desc);
+ break;
}
}
@@ -916,6 +961,59 @@ s390x_get_lowcore(struct bt_info *bt, ch
}
/*
+ * Copy VX registers out of s390x cpu
+ */
+static void vx_copy(void *buf, struct s390x_cpu *s390x_cpu)
+{
+ char *_buf = buf;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ memcpy(&_buf[i * 16], &s390x_cpu->fprs[i], 8);
+ memcpy(&_buf[i * 16 + 8], &s390x_cpu->vxrs_low[i], 8);
+ }
+ memcpy(&_buf[16 * 16], &s390x_cpu->vxrs_high[0], 16 * 16);
+}
+
+/*
+ * Check if VX registers are available
+ */
+static int has_vx_regs(char *lowcore)
+{
+ unsigned long addr = *((uint64_t *)(lowcore + 0x11b0));
+
+ if (addr == 0 || addr % 1024)
+ return 0;
+ return 1;
+}
+
+/*
+ * Print vector registers for cpu
+ */
+static void
+s390x_print_vx_sa(struct bt_info *bt, char *lc)
+{
+ char vx_sa[VX_SA_SIZE];
+ uint64_t addr;
+
+ if (!(bt->flags & BT_SHOW_ALL_REGS))
+ return;
+ if (!has_vx_regs(lc))
+ return;
+ if (!s390x_cpu_vec) {
+ /* Pointer to save area */
+ addr = *((uint64_t *)(lc + 0x11b0));
+ readmem(addr, KVADDR, vx_sa, sizeof(vx_sa), "vx_sa",
+ FAULT_ON_ERROR);
+ } else {
+ /* Get data from s390x cpu */
+ vx_copy(vx_sa, s390x_cpu_get(bt));
+ }
+ fprintf(fp, " -vector registers:\n");
+ print_hex_buf(vx_sa, sizeof(vx_sa), 2, " ");
+}
+
+/*
* Get stack address for interrupt stack using the pcpu array
*/
static unsigned long get_int_stack_pcpu(char *stack_name, int cpu)
@@ -1180,9 +1278,11 @@ static void s390x_back_trace_cmd(struct
if (psw_flags & S390X_PSW_MASK_PSTATE) {
fprintf(fp,"Task runs in userspace\n");
s390x_print_lowcore(lowcore,bt,0);
+ s390x_print_vx_sa(bt, lowcore);
return;
}
s390x_print_lowcore(lowcore,bt,1);
+ s390x_print_vx_sa(bt, lowcore);
fprintf(fp,"\n");
if (symbol_exists("restart_stack")) {
get_int_stack("restart_stack",
9 years, 11 months
About displaying virtual memory information of exiting task
by qiaonuohan
Hello Dave,
I'd like to discuss about the following feature with you and get some advise.
vm command is used to display virtual memory information of a task. But if the
task is exiting(according to crash, 'tsk->flags & PF_EXITING' is true), vm will
set mm to 0 in get_task_mem_usage(). But the mm may be not freed yet, the mm and
its related virtual information is helpful when debuging a exiting task.
I was considering to ignore the IS_EXITING(task) in get_task_mem_usage() and
if tsk->mm is set to NULL but the mm is not freed(see the following case), then we
can specify the mm manually.
CASE(the code is from kernel):
<cut>
exit_mm()
{
...
tsk->mm = NULL; --> dump after this, and before mmput() freeing mm
...
mmput(mm);
}
<cut>
But I guess it is not a good design to you. So I reconsidered it. What about specifying
mm to vm just like task's pid or address. Then vm can retrieve virtual memory information
from specified mm directly. And get the owner task from mm->owner.
--
Regards
Qiao Nuohan
9 years, 11 months
[RFC PATCH v3 0/2] Show memory overcommit data in dump_kmeminfo()
by Aaron Tomlin
The first patch changes dump_kmeminfo() to report overcommit information
similar to that displayed under the proc/meminfo file. It may be useful to
indicate memory over commitment abuse, for example with forced vmcores from
system hangs due to shortage of memory. The intended output is as follows:
crash> kmem -i
PAGES TOTAL PERCENTAGE
TOTAL MEM 1965332 7.5 GB ----
FREE 78080 305 MB 3% of TOTAL MEM
USED 1887252 7.2 GB 96% of TOTAL MEM
SHARED 789954 3 GB 40% of TOTAL MEM
BUFFERS 110606 432.1 MB 5% of TOTAL MEM
CACHED 1212645 4.6 GB 61% of TOTAL MEM
SLAB 146563 572.5 MB 7% of TOTAL MEM
TOTAL SWAP 1970175 7.5 GB ----
SWAP USED 5 20 KB 0% of TOTAL SWAP
SWAP FREE 1970170 7.5 GB 99% of TOTAL SWAP
COMMIT LIMIT 2952841 11.3 GB ----
COMMITTED 1150595 4.4 GB 38% of TOTAL LIMIT
The second patch simply removes the mention of dump_zone_page_usage()
availability from kmem's help page.
Tested under 3.16.4-200.fc20.x86_64 only.
Though this should work under RHEL4 (2.6.9-5) and above whereby the
kernel symbol sysctl_overcommit_memory is available.
Changelog:
v2 -> v3:
- Appropriately avoid a divide-by-zero when the "allowed" variable
is correctly set to 0 (e.g. overcommit_ratio is 0 and swap
device is off)
- Initialised the following members of offset_table in vm_init():
hstate_order, hstate_nr_huge_pages, hstate_free_huge_pages and
hstate_name
- percpu_counter_struct_count and atomic_t_counter have
been added to offset_table and each initialised in kernel_init()
Aaron Tomlin (2):
kmem: Show memory commitment data in kmem output
help: Remove mention of dump_zone_page_usage()
defs.h | 2 +
help.c | 40 +++++++-------
kernel.c | 11 ++++
memory.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++++++----------
symbols.c | 4 ++
5 files changed, 187 insertions(+), 51 deletions(-)
--
1.9.3
9 years, 11 months
command dis fails to display name of function
by qiaonuohan
Hello Dave,
When using dis command on RHEL5.11, I found a weird thing. Please check the
following execution.
<cut>
crash> dis do_wp_page | grep 591
0xffffffff8001115c <do_wp_page+591>: callq 0xffffffff800623d0 <__sched_text_start>
crash> dis schedule
0xffffffff800623d0 <__sched_text_start>: push %rbp
0xffffffff800623d1 <schedule+1>: mov %rsp,%rbp
...
crash> dis do_wp_page | grep 591
0xffffffff8001115c <do_wp_page+591>: callq 0xffffffff800623d0 <schedule>
crash> dis do_wp_page | grep 591
0xffffffff8001115c <do_wp_page+591>: callq 0xffffffff800623d0 <__sched_text_start>
<cut>
schedule and __sched_text_start are always shifting.
After some investigation, I found the following sentences in
arch/x86_64/kernel/vmlinux.lds.S
<cut>
SECTIONS
{
...
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
...
<cut>
the SCHED_TEXT is defined like below
<cut>
#define SCHED_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__sched_text_start) = .; \
*(.sched.text) \
VMLINUX_SYMBOL(__sched_text_end) = .;
<cut>
So symbol __sched_text_start may have the same address as the first function
of *(.sched.text). And LOCK_TEXT/KPROBES_TEXT/IRQENTRY_TEXT has the same
problems. The attached patch is used to fix this.
--
Regards
Qiao Nuohan
9 years, 11 months
[patch crash v3 0/2] namespace support for net command
by Vasily Averin
Dear Dave,
could you please take a look at v3 version of my patch set
v3:
- show_net_devices_v3: VALID_MEMBER checks now nsproxy_net_ns
instead of task_struct_nsproxy
v2:
- compilation was fixed
- nsproxy_net_ns moved to end of existing offset_table
- -n key behavior was changed, now it can work without pid/task argument:
"net" without arguments shows information about init_net
"net -n" -- gets net namespace reference from current task
"net -n pid|taskp" -- from specified task
- "net -n" example was added into help
- show_net_devices_v3: rollback to using of init_net on kernels without
nsproxy support
- tested, seems works correctly on RHEL6-based OpenVZ kernel
Vasily Averin (2):
net -n option renamed to -N
namespace support for net command
defs.h | 1 +
help.c | 14 +++++++++++---
net.c | 58 ++++++++++++++++++++++++++++++++++++++++++----------------
symbols.c | 2 ++
4 files changed, 56 insertions(+), 19 deletions(-)
--
1.9.1
9 years, 11 months
[patch crash v3 2/2] namespace support for net command
by Vasily Averin
net without arguments shows network devices in init_net
net -n -- in network namespace of current task
net -n pid|taskp -- in network namespace of specified task
Signed-off-by: Vasily Averin <vvs(a)openvz.org>
---
defs.h | 1 +
help.c | 10 +++++++++-
net.c | 56 +++++++++++++++++++++++++++++++++++++++++---------------
symbols.c | 2 ++
4 files changed, 53 insertions(+), 16 deletions(-)
diff --git a/defs.h b/defs.h
index dc2d65a..d51936f 100644
--- a/defs.h
+++ b/defs.h
@@ -1923,6 +1923,7 @@ struct offset_table { /* stash of commonly-used offsets */
long kernfs_node_parent;
long kmem_cache_cpu_partial;
long kmem_cache_cpu_cache;
+ long nsproxy_net_ns;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/help.c b/help.c
index 780966b..4f98aa5 100644
--- a/help.c
+++ b/help.c
@@ -6452,7 +6452,7 @@ NULL
char *help_net[] = {
"net",
"network command",
-"[-a] [[-s | -S [-xd]] [-R ref] [pid | taskp]] [-N addr]",
+"[-a] [[-s | -S [-xd]] [-R ref] [-n] [pid | taskp]] [-N addr]",
" Display various network related data:\n",
" -a display the ARP cache.",
" -s display open network socket/sock addresses, their family and type,",
@@ -6465,6 +6465,9 @@ char *help_net[] = {
" -N addr translates an IPv4 address expressed as a decimal or hexadecimal",
" value into a standard numbers-and-dots notation.",
" -R ref socket or sock address, or file descriptor.",
+" For kernels supporting namespaces, the -n option may be used to",
+" display the network devices with respect to the network namespace"
+" of a current task or a task specified by pid or taskp:\n",
" pid a process PID.",
" taskp a hexadecimal task_struct pointer.\n",
" If no arguments are entered, the list of network devices, names and IP",
@@ -6479,6 +6482,11 @@ char *help_net[] = {
" c0249f20 lo 127.0.0.1",
" c7fe6d80 eth0 10.1.8.20",
" ",
+"\n %s> net -n 2618",
+" NET_DEVICE NAME IP ADDRESS(ES)",
+" ffff880456ee7020 lo 127.0.0.1",
+" ffff8804516a1020 eth0 10.1.9.223",
+" ",
" Dump the ARP cache:\n",
" %s> net -a",
" NEIGHBOUR IP ADDRESS HW TYPE HW ADDRESS DEVICE STATE",
diff --git a/net.c b/net.c
index cdd424c..b6d9f25 100644
--- a/net.c
+++ b/net.c
@@ -64,9 +64,9 @@ struct devinfo {
/* bytes needed for <ip address>:<port> notation */
#define BYTES_IP_TUPLE (BYTES_IP_ADDR + BYTES_PORT_NUM + 1)
-static void show_net_devices(void);
-static void show_net_devices_v2(void);
-static void show_net_devices_v3(void);
+static void show_net_devices(ulong);
+static void show_net_devices_v2(ulong);
+static void show_net_devices_v3(ulong);
static void print_neighbour_q(ulong, int);
static void get_netdev_info(ulong, struct devinfo *);
static void get_device_name(ulong, char *);
@@ -137,6 +137,8 @@ net_init(void)
error(WARNING,
"net_init: unknown device type for net device");
}
+ if (VALID_MEMBER(task_struct_nsproxy))
+ MEMBER_OFFSET_INIT(nsproxy_net_ns, "nsproxy", "net_ns");
if (net->flags & NETDEV_INIT) {
MK_TYPE_T(net->dev_name_t, net->netdevice, "name");
@@ -304,7 +306,7 @@ net_init(void)
* The net command...
*/
-#define NETOPTS "N:asSR:xd"
+#define NETOPTS "N:asSR:xdn"
#define s_FLAG FOREACH_s_FLAG
#define S_FLAG FOREACH_S_FLAG
#define x_FLAG FOREACH_x_FLAG
@@ -324,8 +326,10 @@ void
cmd_net(void)
{
int c;
- ulong sflag;
+ ulong sflag, nflag;
ulong value;
+ ulong task;
+ struct task_context *tc = NULL;
struct in_addr in_addr;
struct reference reference, *ref;
@@ -333,7 +337,8 @@ cmd_net(void)
error(FATAL, "net subsystem not initialized!");
ref = NULL;
- sflag = 0;
+ sflag = nflag = 0;
+ task = pid_to_task(0);
while ((c = getopt(argcnt, args, NETOPTS)) != EOF) {
switch (c) {
@@ -387,6 +392,19 @@ cmd_net(void)
sflag |= d_FLAG;
break;
+ case 'n':
+ nflag = 1;
+ task = CURRENT_TASK();
+ if (args[optind]) {
+ switch (str_to_context(args[optind],
+ &value, &tc)) {
+ case STR_PID:
+ case STR_TASK:
+ task = tc->task;
+ }
+ }
+ break;
+
default:
argerrs++;
break;
@@ -399,8 +417,8 @@ cmd_net(void)
if (sflag)
dump_sockets(sflag, ref);
- if (argcnt == 1)
- show_net_devices();
+ if ((argcnt == 1) || nflag)
+ show_net_devices(task);
}
/*
@@ -408,17 +426,17 @@ cmd_net(void)
*/
static void
-show_net_devices(void)
+show_net_devices(ulong task)
{
ulong next;
long flen;
char buf[BUFSIZE];
if (symbol_exists("dev_base_head")) {
- show_net_devices_v2();
+ show_net_devices_v2(task);
return;
} else if (symbol_exists("init_net")) {
- show_net_devices_v3();
+ show_net_devices_v3(task);
return;
}
@@ -452,7 +470,7 @@ show_net_devices(void)
}
static void
-show_net_devices_v2(void)
+show_net_devices_v2(ulong task)
{
struct list_data list_data, *ld;
char *net_device_buf;
@@ -501,8 +519,9 @@ show_net_devices_v2(void)
}
static void
-show_net_devices_v3(void)
+show_net_devices_v3(ulong task)
{
+ ulong nsproxy_p, net_ns_p;
struct list_data list_data, *ld;
char *net_device_buf;
char buf[BUFSIZE];
@@ -523,8 +542,15 @@ show_net_devices_v3(void)
ld = &list_data;
BZERO(ld, sizeof(struct list_data));
ld->flags |= LIST_ALLOCATE;
- ld->start = ld->end =
- symbol_value("init_net") + OFFSET(net_dev_base_head);
+ if (VALID_MEMBER(nsproxy_net_ns)) {
+ readmem(task + OFFSET(task_struct_nsproxy), KVADDR, &nsproxy_p,
+ sizeof(ulong), "task_struct.nsproxy", FAULT_ON_ERROR);
+ if (!readmem(nsproxy_p + OFFSET(nsproxy_net_ns), KVADDR, &net_ns_p,
+ sizeof(ulong), "nsproxy.net_ns", RETURN_ON_ERROR|QUIET))
+ error(FATAL, "cannot determine net_namespace location!\n");
+ } else
+ net_ns_p = symbol_value("init_net");
+ ld->start = ld->end = net_ns_p + OFFSET(net_dev_base_head);
ld->list_head_offset = OFFSET(net_device_dev_list);
ndevcnt = do_list(ld);
diff --git a/symbols.c b/symbols.c
index cebff52..cb642f6 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9295,6 +9295,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(kern_ipc_perm_seq));
fprintf(fp, " nsproxy_ipc_ns: %ld\n",
OFFSET(nsproxy_ipc_ns));
+ fprintf(fp, " nsproxy_net_ns: %ld\n",
+ OFFSET(nsproxy_net_ns));
fprintf(fp, " shmem_inode_info_swapped: %ld\n",
OFFSET(shmem_inode_info_swapped));
fprintf(fp, " shmem_inode_info_vfs_inode: %ld\n",
--
1.9.1
9 years, 11 months
[patch crash v3 1/2] net -n option renamed to -N
by Vasily Averin
ipcs and mount commands uses -n key as namespace pointer.
Here we clear the place for similar option in net command
Signed-off-by: Vasily Averin <vvs(a)openvz.org>
---
help.c | 6 +++---
net.c | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/help.c b/help.c
index 6aa3e20..780966b 100644
--- a/help.c
+++ b/help.c
@@ -6452,7 +6452,7 @@ NULL
char *help_net[] = {
"net",
"network command",
-"[-a] [[-s | -S [-xd]] [-R ref] [pid | taskp]] [-n addr]",
+"[-a] [[-s | -S [-xd]] [-R ref] [pid | taskp]] [-N addr]",
" Display various network related data:\n",
" -a display the ARP cache.",
" -s display open network socket/sock addresses, their family and type,",
@@ -6462,7 +6462,7 @@ char *help_net[] = {
" of both structures.",
" -x override default output format with hexadecimal format.",
" -d override default output format with decimal format.",
-" -n addr translates an IPv4 address expressed as a decimal or hexadecimal",
+" -N addr translates an IPv4 address expressed as a decimal or hexadecimal",
" value into a standard numbers-and-dots notation.",
" -R ref socket or sock address, or file descriptor.",
" pid a process PID.",
@@ -6531,7 +6531,7 @@ char *help_net[] = {
" ...",
" "
" Translate the rcv_saddr from above into dotted-decimal notation:\n",
-" %s> net -n 1041236234",
+" %s> net -N 1041236234",
" 10.1.16.62",
" ",
" From \"foreach\", find all tasks with references to socket c08ea3cc:\n",
diff --git a/net.c b/net.c
index 93491df..cdd424c 100644
--- a/net.c
+++ b/net.c
@@ -304,7 +304,7 @@ net_init(void)
* The net command...
*/
-#define NETOPTS "n:asSR:xd"
+#define NETOPTS "N:asSR:xd"
#define s_FLAG FOREACH_s_FLAG
#define S_FLAG FOREACH_S_FLAG
#define x_FLAG FOREACH_x_FLAG
@@ -351,7 +351,7 @@ cmd_net(void)
dump_arp();
break;
- case 'n':
+ case 'N':
value = stol(optarg, FAULT_ON_ERROR, NULL);
in_addr.s_addr = (in_addr_t)value;
fprintf(fp, "%s\n", inet_ntoa(in_addr));
--
1.9.1
9 years, 11 months
OpenVZ wishes for crash
by Vasily Averin
Dear Dave,
Could you please advise how it's better to add some OpenVZ-specific
functionality into mainline crash.
Most important tasks for us are:
1) for specified task: get its Container ID.
2) for specified container: get list of its processes, IP addresses,
references to according structures: mounts/net devices/IPCs and so on.
3) list of running containers:
Container ID, its IP addresses, reference to top parent task (aka init), number of running processes
Can we add some key to "ps" and "foreach" commands?
For example:
"ps -z pid" allows us to get CT Id,
"ps -z CTID" helps to look/calculate number of processes in container
"foreach (ct?) net/ipcs/mount" -- to get container-specific information
Thank you,
Vasily Averin
9 years, 11 months
[patch crash v2 2/2] namespace support for net command
by Vasily Averin
net without arguments shows network devices in init_net
net -n -- in network namespace of current task
net -n pid|taskp -- in network namespace of specified task
Signed-off-by: Vasily Averin <vvs(a)openvz.org>
---
defs.h | 1 +
help.c | 10 +++++++++-
net.c | 56 +++++++++++++++++++++++++++++++++++++++++---------------
symbols.c | 2 ++
4 files changed, 53 insertions(+), 16 deletions(-)
diff --git a/defs.h b/defs.h
index dc2d65a..d51936f 100644
--- a/defs.h
+++ b/defs.h
@@ -1923,6 +1923,7 @@ struct offset_table { /* stash of commonly-used offsets */
long kernfs_node_parent;
long kmem_cache_cpu_partial;
long kmem_cache_cpu_cache;
+ long nsproxy_net_ns;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/help.c b/help.c
index 780966b..4f98aa5 100644
--- a/help.c
+++ b/help.c
@@ -6452,7 +6452,7 @@ NULL
char *help_net[] = {
"net",
"network command",
-"[-a] [[-s | -S [-xd]] [-R ref] [pid | taskp]] [-N addr]",
+"[-a] [[-s | -S [-xd]] [-R ref] [-n] [pid | taskp]] [-N addr]",
" Display various network related data:\n",
" -a display the ARP cache.",
" -s display open network socket/sock addresses, their family and type,",
@@ -6465,6 +6465,9 @@ char *help_net[] = {
" -N addr translates an IPv4 address expressed as a decimal or hexadecimal",
" value into a standard numbers-and-dots notation.",
" -R ref socket or sock address, or file descriptor.",
+" For kernels supporting namespaces, the -n option may be used to",
+" display the network devices with respect to the network namespace"
+" of a current task or a task specified by pid or taskp:\n",
" pid a process PID.",
" taskp a hexadecimal task_struct pointer.\n",
" If no arguments are entered, the list of network devices, names and IP",
@@ -6479,6 +6482,11 @@ char *help_net[] = {
" c0249f20 lo 127.0.0.1",
" c7fe6d80 eth0 10.1.8.20",
" ",
+"\n %s> net -n 2618",
+" NET_DEVICE NAME IP ADDRESS(ES)",
+" ffff880456ee7020 lo 127.0.0.1",
+" ffff8804516a1020 eth0 10.1.9.223",
+" ",
" Dump the ARP cache:\n",
" %s> net -a",
" NEIGHBOUR IP ADDRESS HW TYPE HW ADDRESS DEVICE STATE",
diff --git a/net.c b/net.c
index cdd424c..dd1c7c2 100644
--- a/net.c
+++ b/net.c
@@ -64,9 +64,9 @@ struct devinfo {
/* bytes needed for <ip address>:<port> notation */
#define BYTES_IP_TUPLE (BYTES_IP_ADDR + BYTES_PORT_NUM + 1)
-static void show_net_devices(void);
-static void show_net_devices_v2(void);
-static void show_net_devices_v3(void);
+static void show_net_devices(ulong);
+static void show_net_devices_v2(ulong);
+static void show_net_devices_v3(ulong);
static void print_neighbour_q(ulong, int);
static void get_netdev_info(ulong, struct devinfo *);
static void get_device_name(ulong, char *);
@@ -137,6 +137,8 @@ net_init(void)
error(WARNING,
"net_init: unknown device type for net device");
}
+ if (VALID_MEMBER(task_struct_nsproxy))
+ MEMBER_OFFSET_INIT(nsproxy_net_ns, "nsproxy", "net_ns");
if (net->flags & NETDEV_INIT) {
MK_TYPE_T(net->dev_name_t, net->netdevice, "name");
@@ -304,7 +306,7 @@ net_init(void)
* The net command...
*/
-#define NETOPTS "N:asSR:xd"
+#define NETOPTS "N:asSR:xdn"
#define s_FLAG FOREACH_s_FLAG
#define S_FLAG FOREACH_S_FLAG
#define x_FLAG FOREACH_x_FLAG
@@ -324,8 +326,10 @@ void
cmd_net(void)
{
int c;
- ulong sflag;
+ ulong sflag, nflag;
ulong value;
+ ulong task;
+ struct task_context *tc = NULL;
struct in_addr in_addr;
struct reference reference, *ref;
@@ -333,7 +337,8 @@ cmd_net(void)
error(FATAL, "net subsystem not initialized!");
ref = NULL;
- sflag = 0;
+ sflag = nflag = 0;
+ task = pid_to_task(0);
while ((c = getopt(argcnt, args, NETOPTS)) != EOF) {
switch (c) {
@@ -387,6 +392,19 @@ cmd_net(void)
sflag |= d_FLAG;
break;
+ case 'n':
+ nflag = 1;
+ task = CURRENT_TASK();
+ if (args[optind]) {
+ switch (str_to_context(args[optind],
+ &value, &tc)) {
+ case STR_PID:
+ case STR_TASK:
+ task = tc->task;
+ }
+ }
+ break;
+
default:
argerrs++;
break;
@@ -399,8 +417,8 @@ cmd_net(void)
if (sflag)
dump_sockets(sflag, ref);
- if (argcnt == 1)
- show_net_devices();
+ if ((argcnt == 1) || nflag)
+ show_net_devices(task);
}
/*
@@ -408,17 +426,17 @@ cmd_net(void)
*/
static void
-show_net_devices(void)
+show_net_devices(ulong task)
{
ulong next;
long flen;
char buf[BUFSIZE];
if (symbol_exists("dev_base_head")) {
- show_net_devices_v2();
+ show_net_devices_v2(task);
return;
} else if (symbol_exists("init_net")) {
- show_net_devices_v3();
+ show_net_devices_v3(task);
return;
}
@@ -452,7 +470,7 @@ show_net_devices(void)
}
static void
-show_net_devices_v2(void)
+show_net_devices_v2(ulong task)
{
struct list_data list_data, *ld;
char *net_device_buf;
@@ -501,8 +519,9 @@ show_net_devices_v2(void)
}
static void
-show_net_devices_v3(void)
+show_net_devices_v3(ulong task)
{
+ ulong nsproxy_p, net_ns_p;
struct list_data list_data, *ld;
char *net_device_buf;
char buf[BUFSIZE];
@@ -523,8 +542,15 @@ show_net_devices_v3(void)
ld = &list_data;
BZERO(ld, sizeof(struct list_data));
ld->flags |= LIST_ALLOCATE;
- ld->start = ld->end =
- symbol_value("init_net") + OFFSET(net_dev_base_head);
+ if (VALID_MEMBER(task_struct_nsproxy)) {
+ readmem(task + OFFSET(task_struct_nsproxy), KVADDR, &nsproxy_p,
+ sizeof(ulong), "task_struct.nsproxy", FAULT_ON_ERROR);
+ if (!readmem(nsproxy_p + OFFSET(nsproxy_net_ns), KVADDR, &net_ns_p,
+ sizeof(ulong), "nsproxy.net_ns", RETURN_ON_ERROR|QUIET))
+ error(FATAL, "cannot determine net_namespace location!\n");
+ } else
+ net_ns_p = symbol_value("init_net");
+ ld->start = ld->end = net_ns_p + OFFSET(net_dev_base_head);
ld->list_head_offset = OFFSET(net_device_dev_list);
ndevcnt = do_list(ld);
diff --git a/symbols.c b/symbols.c
index cebff52..cb642f6 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9295,6 +9295,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(kern_ipc_perm_seq));
fprintf(fp, " nsproxy_ipc_ns: %ld\n",
OFFSET(nsproxy_ipc_ns));
+ fprintf(fp, " nsproxy_net_ns: %ld\n",
+ OFFSET(nsproxy_net_ns));
fprintf(fp, " shmem_inode_info_swapped: %ld\n",
OFFSET(shmem_inode_info_swapped));
fprintf(fp, " shmem_inode_info_vfs_inode: %ld\n",
--
1.9.1
9 years, 11 months