Отчет о работе
Я использовал perf mem -t load record "commands"
для профилирования задержки доступа к системной памяти. После того, как я бегу perf mem -D report
и я получил следующие результаты:
[root@mdtm-server wenji]# perf mem -D report
# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL
2054 2054 0xffffffff811186bf 0x016ffffe8fbffc804b0 49 0x68100842 /lib/modules/3.12.23/build/vmlinux:perf_event_aux_ctx
2054 2054 0xffffffff81321d6e 0xffff880c7fc87d44 7 0x68100142 /lib/modules/3.12.23/build/vmlinux:ghes_copy_tofrom_phys
Что означают "ADDR", "DSRC", "SYMBOL"?
1 ответ
- IP - ПК инструкции загрузки / хранения;
- SYMBOL - название функции, содержащей эту инструкцию (IP);
- ADDR - адрес виртуальной памяти данных, запрошенных загрузкой / хранением (если не было
--phys-data
опция) - DSRC - "Декодированный источник".
DSRC - В некоторых списках рассылки была рекомендована проверка "SDM, том 3b, Таблица 18-41 (Расположение информации о линейных адресах данных в записи PEBS)".
В ядре также есть код для кодирования DSRC (dse от hw - PEBS; возвращение u64 - dsrc):
http://lxr.free-electrons.com/source/arch/x86/kernel/cpu/perf_event_intel_ds.c?v=4.3#L28
28 union intel_x86_pebs_dse {
29 u64 val;
30 struct {
31 unsigned int ld_dse:4;
32 unsigned int ld_stlb_miss:1;
33 unsigned int ld_locked:1;
34 unsigned int ld_reserved:26;
35 };
36 struct {
37 unsigned int st_l1d_hit:1;
38 unsigned int st_reserved1:3;
39 unsigned int st_stlb_miss:1;
40 unsigned int st_locked:1;
41 unsigned int st_reserved2:26;
42 };
43 };
http://lxr.free-electrons.com/source/arch/x86/kernel/cpu/perf_event_intel_ds.c?v=4.3#L46
46 /*
47 * Map PEBS Load Latency Data Source encodings to generic
48 * memory data source information
49 */
50 #define P(a, b) PERF_MEM_S(a, b)
51 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
52 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
53
54 static const u64 pebs_data_source[] = {
55 P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
56 OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
57 OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
58 OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
59 OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
60 OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
61 OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
62 OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
63 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
64 OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
65 OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
66 OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
67 OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
68 OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
69 OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
70 OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
71 };
72
73 static u64 precise_store_data(u64 status)
74 {
75 union intel_x86_pebs_dse dse;
76 u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
77
78 dse.val = status;
79
80 /*
81 * bit 4: TLB access
82 * 1 = stored missed 2nd level TLB
83 *
84 * so it either hit the walker or the OS
85 * otherwise hit 2nd level TLB
86 */
87 if (dse.st_stlb_miss)
88 val |= P(TLB, MISS);
89 else
90 val |= P(TLB, HIT);
91
92 /*
93 * bit 0: hit L1 data cache
94 * if not set, then all we know is that
95 * it missed L1D
96 */
97 if (dse.st_l1d_hit)
98 val |= P(LVL, HIT);
99 else
100 val |= P(LVL, MISS);
101
102 /*
103 * bit 5: Locked prefix
104 */
105 if (dse.st_locked)
106 val |= P(LOCK, LOCKED);
107
108 return val;
109 }
dsrc звучит как несколько комбинаций макроса PERF_MEM_* в битовых полях:
http://lxr.free-electrons.com/source/include/uapi/linux/perf_event.h?v=4.3#L878
878 union perf_mem_data_src {
879 __u64 val;
880 struct {
881 __u64 mem_op:5, /* type of opcode */
882 mem_lvl:14, /* memory hierarchy level */
883 mem_snoop:5, /* snoop mode */
884 mem_lock:2, /* lock instr */
885 mem_dtlb:7, /* tlb access */
886 mem_rsvd:31;
887 };
888 };
890 /* type of opcode (load/store/prefetch,code) */
891 #define PERF_MEM_OP_NA 0x01 /* not available */
892 #define PERF_MEM_OP_LOAD 0x02 /* load instruction */
893 #define PERF_MEM_OP_STORE 0x04 /* store instruction */
894 #define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
895 #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
896 #define PERF_MEM_OP_SHIFT 0
897
898 /* memory hierarchy (memory level, hit or miss) */
899 #define PERF_MEM_LVL_NA 0x01 /* not available */
900 #define PERF_MEM_LVL_HIT 0x02 /* hit level */
901 #define PERF_MEM_LVL_MISS 0x04 /* miss level */
902 #define PERF_MEM_LVL_L1 0x08 /* L1 */
903 #define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
904 #define PERF_MEM_LVL_L2 0x20 /* L2 */
905 #define PERF_MEM_LVL_L3 0x40 /* L3 */
906 #define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
907 #define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
908 #define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
909 #define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
910 #define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
911 #define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
912 #define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
913 #define PERF_MEM_LVL_SHIFT 5
914
915 /* snoop mode */
916 #define PERF_MEM_SNOOP_NA 0x01 /* not available */
917 #define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
918 #define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
919 #define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
920 #define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
921 #define PERF_MEM_SNOOP_SHIFT 19
922
923 /* locked instruction */
924 #define PERF_MEM_LOCK_NA 0x01 /* not available */
925 #define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
926 #define PERF_MEM_LOCK_SHIFT 24
927
928 /* TLB access */
929 #define PERF_MEM_TLB_NA 0x01 /* not available */
930 #define PERF_MEM_TLB_HIT 0x02 /* hit level */
931 #define PERF_MEM_TLB_MISS 0x04 /* miss level */
932 #define PERF_MEM_TLB_L1 0x08 /* L1 */
933 #define PERF_MEM_TLB_L2 0x10 /* L2 */
934 #define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
935 #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
936 #define PERF_MEM_TLB_SHIFT 26
937
938 #define PERF_MEM_S(a, s) \
939 (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
940