| 1 | /*************************************************************************** |
|---|
| 2 | * Copyright (c) 2006, Broadcom Corporation |
|---|
| 3 | * All Rights Reserved |
|---|
| 4 | * Confidential Property of Broadcom Corporation |
|---|
| 5 | * |
|---|
| 6 | * THIS SOFTWARE MAY ONLY BE USED SUBJECT TO AN EXECUTED SOFTWARE LICENSE |
|---|
| 7 | * AGREEMENT BETWEEN THE USER AND BROADCOM. YOU HAVE NO RIGHT TO USE OR |
|---|
| 8 | * EXPLOIT THIS MATERIAL EXCEPT SUBJECT TO THE TERMS OF SUCH AN AGREEMENT. |
|---|
| 9 | * |
|---|
| 10 | * $brcm_Workfile: bprofile.c $ |
|---|
| 11 | * $brcm_Revision: 16 $ |
|---|
| 12 | * $brcm_Date: 12/22/06 12:05p $ |
|---|
| 13 | * |
|---|
| 14 | * Module Description: |
|---|
| 15 | * |
|---|
| 16 | * Embeddeble profiler library |
|---|
| 17 | * Report module |
|---|
| 18 | * |
|---|
| 19 | * Revision History: |
|---|
| 20 | * |
|---|
| 21 | * $brcm_Log: /BSEAV/lib/bprofile/bprofile.c $ |
|---|
| 22 | * |
|---|
| 23 | * 16 12/22/06 12:05p vsilyaev |
|---|
| 24 | * PR 25997: Improved printing of performance counter data |
|---|
| 25 | * |
|---|
| 26 | * 15 12/14/06 4:37p vsilyaev |
|---|
| 27 | * PR 25997: More accurate adjusting of profile data |
|---|
| 28 | * |
|---|
| 29 | * 14 12/13/06 7:45p vsilyaev |
|---|
| 30 | * PR 25997: Somehow better use of calibration results |
|---|
| 31 | * |
|---|
| 32 | * 13 12/11/06 6:21p vsilyaev |
|---|
| 33 | * PR 25997: Added mode to eleminate estimated overhead of instrumented |
|---|
| 34 | * code |
|---|
| 35 | * |
|---|
| 36 | * 12 12/11/06 12:46p vsilyaev |
|---|
| 37 | * PR 25997: Modularizes report routine |
|---|
| 38 | * |
|---|
| 39 | * 11 12/9/06 12:04p vsilyaev |
|---|
| 40 | * PR 25997: Improved posprocessing of multi-thread profile data |
|---|
| 41 | * |
|---|
| 42 | * 10 12/8/06 7:23p vsilyaev |
|---|
| 43 | * PR 25997: added option to limit number of output entries |
|---|
| 44 | * |
|---|
| 45 | * 9 12/7/06 2:43p vsilyaev |
|---|
| 46 | * PR 25997: Added fixes for 3.4 GCC compiler |
|---|
| 47 | * |
|---|
| 48 | * 8 12/5/06 4:31p vsilyaev |
|---|
| 49 | * PR 25997: Added MIPS performance counters |
|---|
| 50 | * |
|---|
| 51 | * 7 12/5/06 12:02p vsilyaev |
|---|
| 52 | * PR 25997: Added faster, single threaded, probe routine |
|---|
| 53 | * |
|---|
| 54 | * 6 12/1/06 4:18p vsilyaev |
|---|
| 55 | * PR 25997: Improved time accuracy |
|---|
| 56 | * |
|---|
| 57 | * 5 11/30/06 8:33p vsilyaev |
|---|
| 58 | * PR 25997: Use atomic variable to prevent missing data |
|---|
| 59 | * |
|---|
| 60 | * 4 11/28/06 4:48p vsilyaev |
|---|
| 61 | * PR 25997: Added multithreaded profiling |
|---|
| 62 | * |
|---|
| 63 | * 3 11/27/06 11:55a vsilyaev |
|---|
| 64 | * PR 25997: Removed unused variables |
|---|
| 65 | * |
|---|
| 66 | * 2 11/20/06 4:54p vsilyaev |
|---|
| 67 | * PR 25997: Decoupled profiling and symbol table |
|---|
| 68 | * |
|---|
| 69 | * 1 11/16/06 5:23p vsilyaev |
|---|
| 70 | * PR 25997: Embeddable profiler |
|---|
| 71 | * |
|---|
| 72 | *******************************************************************************/ |
|---|
| 73 | #include "bstd.h" |
|---|
| 74 | #include "bkni.h" |
|---|
| 75 | #include "bprofile.h" |
|---|
| 76 | #include "bsymtable.h" |
|---|
| 77 | #include "bprofile_tick.h" |
|---|
| 78 | |
|---|
| 79 | BDBG_MODULE(bprofile); |
|---|
| 80 | #define BDBG_MSG_TRACE(x) /* BDBG_MSG(x) */ |
|---|
| 81 | |
|---|
| 82 | #if BPROFILE_CFG_SINGLE_THREAD |
|---|
| 83 | #define B_GET_EVENT(e) (((e).addr)&B_PROFILE_EVENT_MASK) |
|---|
| 84 | #define B_GET_ADDR(e) ((e).addr)&(~B_PROFILE_EVENT_MASK) |
|---|
| 85 | #else |
|---|
| 86 | #define B_GET_EVENT(e) (((e).event_0)&B_PROFILE_EVENT_MASK) |
|---|
| 87 | #define B_GET_STACK(e) (((e).event_0)&(~B_PROFILE_EVENT_MASK)) |
|---|
| 88 | #define B_GET_ADDR(e) (e).addr |
|---|
| 89 | #endif |
|---|
| 90 | |
|---|
| 91 | typedef struct bprof_res_entry { |
|---|
| 92 | unsigned addr; |
|---|
| 93 | unsigned count; |
|---|
| 94 | unsigned child_count; /* number of calls into the child functions */ |
|---|
| 95 | bprofile_sample all; |
|---|
| 96 | bprofile_sample self; |
|---|
| 97 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 98 | bprofile_sample preempt; |
|---|
| 99 | #endif |
|---|
| 100 | } bprof_res_entry; |
|---|
| 101 | |
|---|
| 102 | |
|---|
| 103 | typedef struct bprof_call_entry { |
|---|
| 104 | unsigned addr; |
|---|
| 105 | const bprofile_sample *enter; |
|---|
| 106 | const bprofile_sample *child; |
|---|
| 107 | unsigned call_count; /* number of function calls at that level */ |
|---|
| 108 | bprofile_sample all; |
|---|
| 109 | bprofile_sample self; |
|---|
| 110 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 111 | bprofile_sample preempt; |
|---|
| 112 | #endif |
|---|
| 113 | unsigned stack; |
|---|
| 114 | } bprof_call_entry; |
|---|
| 115 | |
|---|
| 116 | #define BPROF_CALL_MAX_STACK 128 |
|---|
| 117 | typedef struct bprof_call_stack { |
|---|
| 118 | unsigned nelem; |
|---|
| 119 | bprof_call_entry stack[BPROF_CALL_MAX_STACK]; |
|---|
| 120 | } bprof_call_stack; |
|---|
| 121 | |
|---|
| 122 | typedef struct bprof_thread_info { |
|---|
| 123 | unsigned stack_max; |
|---|
| 124 | unsigned stack_min; |
|---|
| 125 | const void *thread_key; |
|---|
| 126 | } bprof_thread_info; |
|---|
| 127 | |
|---|
| 128 | #define BPROF_CALL_MAX_THREADS 64 |
|---|
| 129 | typedef struct bprof_threads { |
|---|
| 130 | unsigned nelem; |
|---|
| 131 | bprof_thread_info threads[BPROF_CALL_MAX_THREADS]; |
|---|
| 132 | } bprof_threads; |
|---|
| 133 | |
|---|
| 134 | BSTD_INLINE void |
|---|
| 135 | bprofile_sample_init(bprofile_sample *sample) |
|---|
| 136 | { |
|---|
| 137 | sample->time = 0; |
|---|
| 138 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 139 | { |
|---|
| 140 | unsigned i; |
|---|
| 141 | for(i=0;i<BPROFILE_CFG_PERF_COUNTER;i++) { |
|---|
| 142 | sample->counters[i] = 0; |
|---|
| 143 | } |
|---|
| 144 | } |
|---|
| 145 | #endif |
|---|
| 146 | return; |
|---|
| 147 | } |
|---|
| 148 | |
|---|
| 149 | /* returns ts1-ts2 in modulo arithmetic */ |
|---|
| 150 | BSTD_INLINE unsigned |
|---|
| 151 | b_ts_diff(unsigned ts1, unsigned ts2) |
|---|
| 152 | { |
|---|
| 153 | return ts1-ts2; |
|---|
| 154 | } |
|---|
| 155 | #define B_MAX_TS_DIFF (1u<<31) |
|---|
| 156 | |
|---|
| 157 | BSTD_INLINE void |
|---|
| 158 | bprofile_sample_add_diff(bprofile_sample *acc, const bprofile_sample *a, const bprofile_sample *b) |
|---|
| 159 | { |
|---|
| 160 | unsigned diff = b_ts_diff(a->time, b->time); |
|---|
| 161 | |
|---|
| 162 | if (diff>B_MAX_TS_DIFF) { |
|---|
| 163 | /* ignore out of order entries */ |
|---|
| 164 | return; |
|---|
| 165 | } |
|---|
| 166 | acc->time += diff; |
|---|
| 167 | |
|---|
| 168 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 169 | { |
|---|
| 170 | unsigned i; |
|---|
| 171 | for(i=0;i<BPROFILE_CFG_PERF_COUNTER;i++) { |
|---|
| 172 | acc->counters[i] += bperf_sample_diff(a->counters[i], b->counters[i]) ; |
|---|
| 173 | } |
|---|
| 174 | } |
|---|
| 175 | #endif |
|---|
| 176 | return; |
|---|
| 177 | } |
|---|
| 178 | |
|---|
| 179 | |
|---|
| 180 | BSTD_INLINE void |
|---|
| 181 | bprofile_sample_add(bprofile_sample *acc, const bprofile_sample *a) |
|---|
| 182 | { |
|---|
| 183 | acc->time += a->time; |
|---|
| 184 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 185 | { |
|---|
| 186 | unsigned i; |
|---|
| 187 | for(i=0;i<BPROFILE_CFG_PERF_COUNTER;i++) { |
|---|
| 188 | acc->counters[i] += a->counters[i]; |
|---|
| 189 | } |
|---|
| 190 | } |
|---|
| 191 | #endif |
|---|
| 192 | return; |
|---|
| 193 | } |
|---|
| 194 | |
|---|
| 195 | BSTD_INLINE void |
|---|
| 196 | b_prof_res_entry_init(bprof_res_entry *entry) |
|---|
| 197 | { |
|---|
| 198 | entry->addr = 0; |
|---|
| 199 | entry->count = 0; |
|---|
| 200 | entry->child_count = 0; |
|---|
| 201 | bprofile_sample_init(&entry->all); |
|---|
| 202 | bprofile_sample_init(&entry->self); |
|---|
| 203 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 204 | bprofile_sample_init(&entry->preempt); |
|---|
| 205 | #endif |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | static B_BIN_SEARCH(b_search_addr, bprof_res_entry, addr) |
|---|
| 209 | |
|---|
| 210 | /* shifts down elements in the table */ |
|---|
| 211 | BSTD_INLINE void |
|---|
| 212 | b_profile_move(bprof_res_entry *table, size_t nentries, unsigned pos) |
|---|
| 213 | { |
|---|
| 214 | unsigned i; |
|---|
| 215 | for(i=nentries;i>pos;i--) { |
|---|
| 216 | table[i] = table[i-1]; |
|---|
| 217 | } |
|---|
| 218 | return; |
|---|
| 219 | } |
|---|
| 220 | |
|---|
| 221 | |
|---|
| 222 | BSTD_INLINE bool |
|---|
| 223 | b_res_validate(const bprof_res_entry *res, size_t size) |
|---|
| 224 | { |
|---|
| 225 | unsigned prev_addr; |
|---|
| 226 | size_t i; |
|---|
| 227 | if (size==0) { |
|---|
| 228 | return true; |
|---|
| 229 | } |
|---|
| 230 | prev_addr = res[0].addr; |
|---|
| 231 | for(i=1;i<size;i++) { |
|---|
| 232 | if(prev_addr>=res[i].addr) { |
|---|
| 233 | for(i=0;i<size;i++) { |
|---|
| 234 | BDBG_ERR(("b_res_validate: %u: %#x", i, res[i].addr)); |
|---|
| 235 | } |
|---|
| 236 | return false; |
|---|
| 237 | } |
|---|
| 238 | } |
|---|
| 239 | return true; |
|---|
| 240 | } |
|---|
| 241 | |
|---|
| 242 | BSTD_INLINE void |
|---|
| 243 | b_profile_entry_swap(bprofile_entry *a, bprofile_entry *b) |
|---|
| 244 | { |
|---|
| 245 | bprofile_entry temp; |
|---|
| 246 | |
|---|
| 247 | temp = *a; |
|---|
| 248 | *a = *b; |
|---|
| 249 | *b = temp; |
|---|
| 250 | return; |
|---|
| 251 | } |
|---|
| 252 | |
|---|
| 253 | BSTD_INLINE void |
|---|
| 254 | b_res_heap_swap(bprof_res_entry *a, bprof_res_entry *b) |
|---|
| 255 | { |
|---|
| 256 | bprof_res_entry temp; |
|---|
| 257 | |
|---|
| 258 | temp = *a; |
|---|
| 259 | *a = *b; |
|---|
| 260 | *b = temp; |
|---|
| 261 | return; |
|---|
| 262 | } |
|---|
| 263 | |
|---|
| 264 | |
|---|
| 265 | #define B_RES_KEY_GT(res1,res2) ((res1)->self.time > (res2)->self.time) |
|---|
| 266 | /* #define B_RES_KEY_GT(res1,res2) ((res1)->all.time > (res2)->all.time) */ |
|---|
| 267 | |
|---|
| 268 | static void |
|---|
| 269 | b_res_heap_sift(bprof_res_entry *res, int start, int count) |
|---|
| 270 | { |
|---|
| 271 | int root = start; |
|---|
| 272 | int child; |
|---|
| 273 | |
|---|
| 274 | while( root*2 + 1 < count) { |
|---|
| 275 | child = root*2 + 1; |
|---|
| 276 | if (child < count -1 && B_RES_KEY_GT(&res[child], &res[child+1])) { |
|---|
| 277 | child++; |
|---|
| 278 | } |
|---|
| 279 | if (B_RES_KEY_GT(&res[root], &res[child])) { |
|---|
| 280 | b_res_heap_swap(&res[root], &res[child]); |
|---|
| 281 | root = child; |
|---|
| 282 | } else { |
|---|
| 283 | return; |
|---|
| 284 | } |
|---|
| 285 | } |
|---|
| 286 | return; |
|---|
| 287 | } |
|---|
| 288 | |
|---|
| 289 | static void |
|---|
| 290 | b_res_heap_sort(bprof_res_entry *res, size_t size) |
|---|
| 291 | { |
|---|
| 292 | int i; |
|---|
| 293 | |
|---|
| 294 | for(i=(size/2)-1;i>=0;i--) { |
|---|
| 295 | b_res_heap_sift(res, i, size); |
|---|
| 296 | } |
|---|
| 297 | for(i=size-1;i>0;i--) { |
|---|
| 298 | b_res_heap_swap(&res[i], &res[0]); |
|---|
| 299 | b_res_heap_sift(res, 0, i); |
|---|
| 300 | } |
|---|
| 301 | return; |
|---|
| 302 | } |
|---|
| 303 | |
|---|
| 304 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 305 | |
|---|
| 306 | static B_BIN_SEARCH(b_search_thread, bprof_thread_info, stack_max) |
|---|
| 307 | |
|---|
| 308 | BSTD_INLINE bool |
|---|
| 309 | b_thread_validate(const bprof_threads *threads) |
|---|
| 310 | { |
|---|
| 311 | unsigned i; |
|---|
| 312 | |
|---|
| 313 | for(i=1;i<threads->nelem;i++) { |
|---|
| 314 | if(threads->threads[i].stack_max < threads->threads[i].stack_min) { |
|---|
| 315 | BDBG_ERR(("%u: %#x %#x", i, threads->threads[i].stack_min, threads->threads[i].stack_max)); |
|---|
| 316 | return false; |
|---|
| 317 | } |
|---|
| 318 | if(threads->threads[i-1].stack_max < threads->threads[i-1].stack_max) { |
|---|
| 319 | BDBG_ERR(("%u: %#x %#x", i, threads->threads[i-1].stack_max, threads->threads[i].stack_max)); |
|---|
| 320 | return false; |
|---|
| 321 | } |
|---|
| 322 | } |
|---|
| 323 | return true; |
|---|
| 324 | } |
|---|
| 325 | |
|---|
| 326 | BSTD_INLINE void |
|---|
| 327 | b_profile_thread_entry(bprof_threads *threads, const bprofile_sys_iface *sys_iface, const bprofile_entry *entry) |
|---|
| 328 | { |
|---|
| 329 | int pos; |
|---|
| 330 | const void *key; |
|---|
| 331 | unsigned i; |
|---|
| 332 | unsigned stack = B_GET_STACK(*entry); |
|---|
| 333 | |
|---|
| 334 | pos = b_search_thread(threads->threads, threads->nelem, stack); |
|---|
| 335 | if (pos > 0) { |
|---|
| 336 | return; |
|---|
| 337 | } |
|---|
| 338 | if (pos<0) { |
|---|
| 339 | pos = -(pos+1); |
|---|
| 340 | BDBG_ASSERT(pos>=0); |
|---|
| 341 | } |
|---|
| 342 | if((unsigned)pos<threads->nelem && threads->threads[pos].stack_min <= stack && stack <= threads->threads[pos].stack_max) { |
|---|
| 343 | return; |
|---|
| 344 | } |
|---|
| 345 | /* else search with external function */ |
|---|
| 346 | key = sys_iface->thread_from_stack((uint32_t*)stack); |
|---|
| 347 | if (!key) { |
|---|
| 348 | char buf[16]; |
|---|
| 349 | BDBG_WRN(("unknown thread for stack %#x[%s]", stack, sys_iface->get_name(entry->addr, buf, sizeof(buf)))); |
|---|
| 350 | return; /* invalid stack entry */ |
|---|
| 351 | } |
|---|
| 352 | for(i=0;i<threads->nelem;i++) { |
|---|
| 353 | if(threads->threads[i].thread_key == key) { |
|---|
| 354 | /* bingo found key update entry now */ |
|---|
| 355 | if (stack < threads->threads[i].stack_min) { |
|---|
| 356 | threads->threads[i].stack_min = stack; |
|---|
| 357 | } else if (stack > threads->threads[i].stack_max) { |
|---|
| 358 | threads->threads[i].stack_max = stack; |
|---|
| 359 | } else { |
|---|
| 360 | BDBG_ASSERT(false); /* shall have found it before */ |
|---|
| 361 | } |
|---|
| 362 | BDBG_ASSERT(b_thread_validate(threads)); |
|---|
| 363 | return; |
|---|
| 364 | } |
|---|
| 365 | } |
|---|
| 366 | if (threads->nelem>=BPROF_CALL_MAX_THREADS) { |
|---|
| 367 | /* reached max number of threads */ |
|---|
| 368 | return; |
|---|
| 369 | } |
|---|
| 370 | for(i=threads->nelem;i>(unsigned)pos;i--) { |
|---|
| 371 | threads->threads[i] = threads->threads[i-1]; |
|---|
| 372 | } |
|---|
| 373 | threads->threads[pos].stack_max = stack; |
|---|
| 374 | threads->threads[pos].stack_min = stack; |
|---|
| 375 | threads->threads[pos].thread_key = key; |
|---|
| 376 | threads->nelem++; |
|---|
| 377 | BDBG_ASSERT(b_thread_validate(threads)); |
|---|
| 378 | return; |
|---|
| 379 | } |
|---|
| 380 | #endif |
|---|
| 381 | |
|---|
| 382 | static void |
|---|
| 383 | b_prof_perf_cnt_head_1(void) |
|---|
| 384 | { |
|---|
| 385 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 386 | unsigned i; |
|---|
| 387 | const bperf_counter_mode *mode = bperf_get_mode(); |
|---|
| 388 | |
|---|
| 389 | if (!mode) { |
|---|
| 390 | return; |
|---|
| 391 | } |
|---|
| 392 | for(i=0;i<BPROFILE_CFG_PERF_COUNTER;i++) { |
|---|
| 393 | if (i==0 && mode==&bperf_counter_instructions) { |
|---|
| 394 | BKNI_Printf("%8s ", "issue rate"); |
|---|
| 395 | } else { |
|---|
| 396 | BKNI_Printf("%16s ", mode->counter_names[i]); |
|---|
| 397 | } |
|---|
| 398 | } |
|---|
| 399 | #endif |
|---|
| 400 | } |
|---|
| 401 | static void |
|---|
| 402 | b_prof_perf_cnt_head_2(void) |
|---|
| 403 | { |
|---|
| 404 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 405 | unsigned i; |
|---|
| 406 | const bperf_counter_mode *mode = bperf_get_mode(); |
|---|
| 407 | |
|---|
| 408 | if (!mode) { |
|---|
| 409 | return; |
|---|
| 410 | } |
|---|
| 411 | for(i=0;i<BPROFILE_CFG_PERF_COUNTER;i++) { |
|---|
| 412 | if (i==0 && mode==&bperf_counter_instructions) { |
|---|
| 413 | BKNI_Printf("%8s ", "%"); |
|---|
| 414 | } else { |
|---|
| 415 | BKNI_Printf("%16s ", "total/function"); |
|---|
| 416 | } |
|---|
| 417 | } |
|---|
| 418 | #endif |
|---|
| 419 | } |
|---|
| 420 | |
|---|
| 421 | BSTD_INLINE void |
|---|
| 422 | b_prof_perf_cnt_line(const bprof_res_entry *res) |
|---|
| 423 | { |
|---|
| 424 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 425 | unsigned i; |
|---|
| 426 | const bperf_counter_mode *mode = bperf_get_mode(); |
|---|
| 427 | |
|---|
| 428 | BDBG_ASSERT(res); |
|---|
| 429 | if (!mode) { |
|---|
| 430 | return; |
|---|
| 431 | } |
|---|
| 432 | for(i=0;i<BPROFILE_CFG_PERF_COUNTER;i++) { |
|---|
| 433 | unsigned rate; |
|---|
| 434 | const bprofile_sample *sample=&res->self; |
|---|
| 435 | |
|---|
| 436 | if (i==0 && mode==&bperf_counter_instructions) { |
|---|
| 437 | rate = sample->time/1000; |
|---|
| 438 | if(rate>0) { |
|---|
| 439 | rate = sample->counters[i]/rate; |
|---|
| 440 | } |
|---|
| 441 | BKNI_Printf("%6u.%1u ", rate/10, rate%10); |
|---|
| 442 | continue; |
|---|
| 443 | } |
|---|
| 444 | if (res->count>0) { |
|---|
| 445 | rate = (10*sample->counters[i])/res->count; |
|---|
| 446 | } else { |
|---|
| 447 | rate = 0; |
|---|
| 448 | } |
|---|
| 449 | BKNI_Printf("%9u/%5u.%1u ", sample->counters[i], rate/10, rate%10); |
|---|
| 450 | } |
|---|
| 451 | #endif |
|---|
| 452 | BSTD_UNUSED(res); |
|---|
| 453 | } |
|---|
| 454 | |
|---|
| 455 | static int |
|---|
| 456 | b_prof_get_pos(bprof_res_entry *res, size_t *size, unsigned cur_addr, const bprofile_sys_iface *sys_iface) |
|---|
| 457 | { |
|---|
| 458 | int pos = b_search_addr(res, *size, cur_addr); |
|---|
| 459 | /* BDBG_WRN(("i=%u size=%u pos=%d addr=%#x", i, size, pos, table[i].addr)); */ |
|---|
| 460 | if (pos<0) { |
|---|
| 461 | /* BDBG_WRN(("insert")); */ |
|---|
| 462 | pos = -(pos+1); |
|---|
| 463 | BDBG_ASSERT(pos>=0); |
|---|
| 464 | if (*size>=sys_iface->maxentries) { |
|---|
| 465 | BDBG_WRN(("function table overflowed (%u) %#x", sys_iface->maxentries, cur_addr)); |
|---|
| 466 | return -1; /* skip this entry */ |
|---|
| 467 | } |
|---|
| 468 | b_profile_move(res, *size, pos); |
|---|
| 469 | b_prof_res_entry_init(&res[pos]); |
|---|
| 470 | res[pos].addr = cur_addr; |
|---|
| 471 | (*size)++; |
|---|
| 472 | BDBG_ASSERT(b_res_validate(res, *size)); |
|---|
| 473 | } |
|---|
| 474 | BDBG_ASSERT(res[pos].addr == cur_addr); |
|---|
| 475 | return pos; |
|---|
| 476 | } |
|---|
| 477 | |
|---|
| 478 | BSTD_INLINE bool |
|---|
| 479 | b_thread_match(const bprof_thread_info *thread, unsigned cur_stack) |
|---|
| 480 | { |
|---|
| 481 | return cur_stack >= thread->stack_min && cur_stack <= thread->stack_max; |
|---|
| 482 | } |
|---|
| 483 | |
|---|
| 484 | typedef struct b_profile_cntx { |
|---|
| 485 | bprofile_entry *table; |
|---|
| 486 | size_t nentries; |
|---|
| 487 | const bprofile_sys_iface *sys_iface; |
|---|
| 488 | bprof_call_stack call_stack; |
|---|
| 489 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 490 | bprof_threads threads; |
|---|
| 491 | #endif |
|---|
| 492 | unsigned total_time; |
|---|
| 493 | char buf[32]; |
|---|
| 494 | char buf2[32]; |
|---|
| 495 | } b_profile_cntx; |
|---|
| 496 | typedef size_t (*b_report_method)(b_profile_cntx *cntx); |
|---|
| 497 | |
|---|
| 498 | |
|---|
| 499 | static b_profile_cntx * |
|---|
| 500 | b_profile_cntx_init( bprofile_entry *table, size_t nentries, const bprofile_sys_iface *sys_iface) |
|---|
| 501 | { |
|---|
| 502 | b_profile_cntx *cntx = BKNI_Malloc(sizeof(*cntx)); |
|---|
| 503 | if (!cntx) { |
|---|
| 504 | return NULL; |
|---|
| 505 | } |
|---|
| 506 | cntx->table = table; |
|---|
| 507 | cntx->nentries = nentries; |
|---|
| 508 | cntx->sys_iface = sys_iface; |
|---|
| 509 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 510 | cntx->threads.nelem = 0; |
|---|
| 511 | #endif |
|---|
| 512 | return cntx; |
|---|
| 513 | } |
|---|
| 514 | |
|---|
| 515 | static void |
|---|
| 516 | b_profile_cntx_shutdown(b_profile_cntx *cntx) |
|---|
| 517 | { |
|---|
| 518 | BKNI_Free(cntx); |
|---|
| 519 | } |
|---|
| 520 | |
|---|
| 521 | static size_t |
|---|
| 522 | b_profile_report(b_report_method report, bprofile_entry *table, size_t nentries, const bprofile_sys_iface *sys_iface) |
|---|
| 523 | { |
|---|
| 524 | b_profile_cntx *cntx; |
|---|
| 525 | size_t result=0; |
|---|
| 526 | unsigned diff; |
|---|
| 527 | size_t i; |
|---|
| 528 | unsigned total_time; |
|---|
| 529 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 530 | size_t j; |
|---|
| 531 | const bprof_thread_info *thread; |
|---|
| 532 | bprof_threads *threads; |
|---|
| 533 | #endif |
|---|
| 534 | |
|---|
| 535 | BDBG_ASSERT(sys_iface); |
|---|
| 536 | BDBG_ASSERT(table); |
|---|
| 537 | |
|---|
| 538 | BDBG_MSG(("profile report from %u samples. Options: %u entries limit, %u entries per report %s %s %s %s", nentries, sys_iface->maxentries, sys_iface->split_threads, sys_iface->split_threads?"'split threads'":"", sys_iface->substract_overhead ?"'substract overhead'":"", sys_iface->call_count?"'call count'":"", sys_iface->preempt_time?"'preempt time'":"")); |
|---|
| 539 | if(nentries<=0) { |
|---|
| 540 | BKNI_Printf("no profiler data\n"); |
|---|
| 541 | goto no_data; |
|---|
| 542 | } |
|---|
| 543 | cntx = b_profile_cntx_init(table, nentries, sys_iface); |
|---|
| 544 | if (!cntx) { |
|---|
| 545 | goto err_cntx; |
|---|
| 546 | } |
|---|
| 547 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 548 | threads = &cntx->threads; |
|---|
| 549 | /* 1. Allocate thread entries */ |
|---|
| 550 | for(i=0;i<nentries;i++) { |
|---|
| 551 | b_profile_thread_entry(threads, sys_iface, &table[i]); |
|---|
| 552 | } |
|---|
| 553 | /* 2. Locate out of order entries and fix them */ |
|---|
| 554 | for(thread=&threads->threads[0];thread<&threads->threads[threads->nelem];thread++) { |
|---|
| 555 | /* first locate first entry that belongs to the current thread */ |
|---|
| 556 | for(i=0;i<nentries;i++) { |
|---|
| 557 | if (b_thread_match(thread, B_GET_STACK(table[i]))) { |
|---|
| 558 | break; |
|---|
| 559 | } |
|---|
| 560 | } |
|---|
| 561 | for(;i<nentries;) { |
|---|
| 562 | if (b_thread_match(thread, B_GET_STACK(table[i]))) { |
|---|
| 563 | i++; /* advance to the next entry */ |
|---|
| 564 | continue; |
|---|
| 565 | } |
|---|
| 566 | /* 'i' is entry from the another thread, and 'i-1' shall point to the current thread */ |
|---|
| 567 | BDBG_ASSERT(i>0); |
|---|
| 568 | BDBG_ASSERT(b_thread_match(thread, B_GET_STACK(table[i-1]))); |
|---|
| 569 | diff=b_ts_diff(table[i].sample.time, table[i-1].sample.time); |
|---|
| 570 | if (diff>B_MAX_TS_DIFF) { |
|---|
| 571 | /* out of order sample, move last entry of the current thread all way to the next entry for the current thread */ |
|---|
| 572 | BDBG_MSG_TRACE(("reordering start %u [%u]%s->[%u]%s", i, table[i-1].sample.time, sys_iface->get_name(table[i-1].addr, buf, sizeof(buf)), table[i].sample.time, sys_iface->get_name(table[i].addr, buf2, sizeof(buf2)))); |
|---|
| 573 | j=i; |
|---|
| 574 | for(;i<nentries;i++) { |
|---|
| 575 | if (b_thread_match(thread, B_GET_STACK(table[i]))) { |
|---|
| 576 | break; |
|---|
| 577 | } |
|---|
| 578 | b_profile_entry_swap(&table[i], &table[i-1]); /* push current entry forward */ |
|---|
| 579 | } |
|---|
| 580 | BDBG_MSG_TRACE(("reordering stop %u [%u]%s->[%u]%s", i, table[i-1].sample.time, sys_iface->get_name(table[i-1].addr, buf, sizeof(buf)), table[i].sample.time, sys_iface->get_name(table[i].addr, buf2, sizeof(buf2)))); |
|---|
| 581 | BDBG_MSG_TRACE(("moved %u[%u..%u] out of order thread entries", i-j,j,i)); |
|---|
| 582 | } else { |
|---|
| 583 | /* no discontinuity, just skip entries from other threads */ |
|---|
| 584 | for(;i<nentries;i++) { |
|---|
| 585 | if (b_thread_match(thread, B_GET_STACK(table[i]))) { |
|---|
| 586 | break; |
|---|
| 587 | } |
|---|
| 588 | } |
|---|
| 589 | } |
|---|
| 590 | } |
|---|
| 591 | } |
|---|
| 592 | #endif |
|---|
| 593 | for(total_time=0,i=1;i<nentries;i++) { |
|---|
| 594 | diff=b_ts_diff(table[i].sample.time, table[i-1].sample.time); |
|---|
| 595 | if (diff>B_MAX_TS_DIFF) { |
|---|
| 596 | BDBG_WRN(("out of order entry %u [%u]%s->[%u]%s", i, table[i-1].sample.time, sys_iface->get_name(table[i-1].addr, cntx->buf, sizeof(cntx->buf)), table[i].sample.time, sys_iface->get_name(table[i].addr, cntx->buf2, sizeof(cntx->buf2)))); |
|---|
| 597 | continue; |
|---|
| 598 | } |
|---|
| 599 | total_time+=diff; |
|---|
| 600 | } |
|---|
| 601 | cntx->total_time = total_time; |
|---|
| 602 | result = report(cntx); |
|---|
| 603 | |
|---|
| 604 | b_profile_cntx_shutdown(cntx); |
|---|
| 605 | err_cntx: |
|---|
| 606 | no_data: |
|---|
| 607 | return result; |
|---|
| 608 | } |
|---|
| 609 | |
|---|
| 610 | BSTD_INLINE unsigned |
|---|
| 611 | b_profile_adjust_time(unsigned time, unsigned overhead) |
|---|
| 612 | { |
|---|
| 613 | if (time>overhead) { |
|---|
| 614 | return time-overhead; |
|---|
| 615 | } else { |
|---|
| 616 | return 0; |
|---|
| 617 | } |
|---|
| 618 | } |
|---|
| 619 | |
|---|
| 620 | static void |
|---|
| 621 | b_profile_report_flat_print(b_profile_cntx *cntx, bprof_res_entry *res, size_t nelem, unsigned total_time) |
|---|
| 622 | { |
|---|
| 623 | const bprofile_sys_iface *sys_iface=cntx->sys_iface; |
|---|
| 624 | size_t i; |
|---|
| 625 | b_tick2ms_init(); |
|---|
| 626 | |
|---|
| 627 | if(sys_iface->substract_overhead) { |
|---|
| 628 | bprofile_probe_info probe_info; |
|---|
| 629 | |
|---|
| 630 | bprofile_get_info(&probe_info); |
|---|
| 631 | |
|---|
| 632 | for(i=0;i<nelem;i++) { |
|---|
| 633 | bprof_res_entry *cur = &res[i]; |
|---|
| 634 | unsigned overhead = (probe_info.overhead.time * cur->count)/2; |
|---|
| 635 | if (overhead > cur->self.time) { |
|---|
| 636 | BDBG_MSG(("overhead estimation: %s %ux(%u) %u", sys_iface->get_name(res[i].addr, cntx->buf, sizeof(cntx->buf)), res[i].count, overhead, res[i].self.time)); |
|---|
| 637 | } |
|---|
| 638 | cur->self.time = b_profile_adjust_time(res[i].self.time, overhead); /* self time takes only one penalty */ |
|---|
| 639 | overhead += probe_info.overhead.time*cur->child_count; /* all time has additional parent's penalty for each call to a children */ |
|---|
| 640 | cur->all.time = b_profile_adjust_time(cur->all.time, overhead); |
|---|
| 641 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 642 | { |
|---|
| 643 | unsigned j; |
|---|
| 644 | for(j=0;j<BPROFILE_CFG_PERF_COUNTER;j++) { |
|---|
| 645 | |
|---|
| 646 | overhead = (probe_info.overhead.counters[j]* cur->count)/2; |
|---|
| 647 | cur->self.counters[j] = b_profile_adjust_time(cur->self.counters[j], overhead); /* self takes only one penalty */ |
|---|
| 648 | overhead += probe_info.overhead.counters[j]*cur->child_count; /* all has additional parent's penalty for each call to a child */ |
|---|
| 649 | cur->all.counters[j] = b_profile_adjust_time(cur->all.counters[j], overhead); |
|---|
| 650 | } |
|---|
| 651 | } |
|---|
| 652 | #endif |
|---|
| 653 | } |
|---|
| 654 | } |
|---|
| 655 | |
|---|
| 656 | /* sort table based on time */ |
|---|
| 657 | b_res_heap_sort(res, nelem); |
|---|
| 658 | total_time /= 1000; /* scale to make percentage */ |
|---|
| 659 | if(total_time==0) { |
|---|
| 660 | total_time = 1; |
|---|
| 661 | } |
|---|
| 662 | BKNI_Printf("%5c %10s %8s ", '%', "cumulative", "self"); |
|---|
| 663 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 664 | if(sys_iface->preempt_time) { |
|---|
| 665 | BKNI_Printf("%8s ", "preempt"); |
|---|
| 666 | } |
|---|
| 667 | #endif |
|---|
| 668 | b_prof_perf_cnt_head_1(); |
|---|
| 669 | if(sys_iface->call_count) { |
|---|
| 670 | BKNI_Printf("%8s ", "function"); |
|---|
| 671 | } |
|---|
| 672 | BKNI_Printf("%8s %s\n", "", ""); |
|---|
| 673 | BKNI_Printf("%5s %10s %8s ", "time", "msec", "msec"); |
|---|
| 674 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 675 | if(sys_iface->preempt_time) { |
|---|
| 676 | BKNI_Printf("%8s ", "msec"); |
|---|
| 677 | } |
|---|
| 678 | #endif |
|---|
| 679 | b_prof_perf_cnt_head_2(); |
|---|
| 680 | if(sys_iface->call_count) { |
|---|
| 681 | BKNI_Printf("%8s ", "calls"); |
|---|
| 682 | } |
|---|
| 683 | BKNI_Printf("%8s %s\n", "calls", "name"); |
|---|
| 684 | for(i=0;i<nelem;i++) { |
|---|
| 685 | unsigned time_all = b_tick2_100us(res[i].all.time); |
|---|
| 686 | unsigned time_self = b_tick2_100us(res[i].self.time); |
|---|
| 687 | unsigned percent = res[i].self.time/total_time; |
|---|
| 688 | #define B_100US_FORMAT(n) "%"#n"u.%1u" |
|---|
| 689 | #define B_100US_DATA(v) v/10, v%10 |
|---|
| 690 | if (i>sys_iface->show_entries) { |
|---|
| 691 | break; |
|---|
| 692 | } |
|---|
| 693 | |
|---|
| 694 | if (sys_iface->comma_delimited) |
|---|
| 695 | { |
|---|
| 696 | BKNI_Printf("%u.%u,%u.%u,%u.%u", percent/10, percent%10, B_100US_DATA(time_all), B_100US_DATA(time_self)); |
|---|
| 697 | } |
|---|
| 698 | else |
|---|
| 699 | { |
|---|
| 700 | BKNI_Printf("%3u.%1u" B_100US_FORMAT(8) " " B_100US_FORMAT(6) " " , percent/10, percent%10, B_100US_DATA(time_all), B_100US_DATA(time_self)); |
|---|
| 701 | } |
|---|
| 702 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 703 | if(sys_iface->preempt_time) { |
|---|
| 704 | unsigned time_preempt = b_tick2_100us(res[i].preempt.time); |
|---|
| 705 | if (sys_iface->comma_delimited) |
|---|
| 706 | { |
|---|
| 707 | BKNI_Printf(",%u.%u", B_100US_DATA(time_preempt)); |
|---|
| 708 | } |
|---|
| 709 | else |
|---|
| 710 | { |
|---|
| 711 | BKNI_Printf(B_100US_FORMAT(6) " " , B_100US_DATA(time_preempt)); |
|---|
| 712 | } |
|---|
| 713 | } |
|---|
| 714 | #endif |
|---|
| 715 | b_prof_perf_cnt_line(&res[i]); |
|---|
| 716 | if(sys_iface->call_count) { |
|---|
| 717 | if (sys_iface->comma_delimited) |
|---|
| 718 | { |
|---|
| 719 | BKNI_Printf(",%u", res[i].child_count); |
|---|
| 720 | } |
|---|
| 721 | else |
|---|
| 722 | { |
|---|
| 723 | BKNI_Printf("%8u ", res[i].child_count); |
|---|
| 724 | } |
|---|
| 725 | } |
|---|
| 726 | |
|---|
| 727 | if (sys_iface->comma_delimited) |
|---|
| 728 | { |
|---|
| 729 | BKNI_Printf(",%u,%s\n", res[i].count, sys_iface->get_name(res[i].addr, cntx->buf, sizeof(cntx->buf))); |
|---|
| 730 | } |
|---|
| 731 | else |
|---|
| 732 | { |
|---|
| 733 | BKNI_Printf("%8u %s\n", res[i].count, sys_iface->get_name(res[i].addr, cntx->buf, sizeof(cntx->buf))); |
|---|
| 734 | } |
|---|
| 735 | } |
|---|
| 736 | |
|---|
| 737 | } |
|---|
| 738 | |
|---|
| 739 | static size_t |
|---|
| 740 | b_profile_report_flat(b_profile_cntx *cntx) |
|---|
| 741 | { |
|---|
| 742 | const bprofile_sys_iface *sys_iface=cntx->sys_iface; |
|---|
| 743 | bprof_res_entry *res = BKNI_Malloc(sizeof(*res)*sys_iface->maxentries); |
|---|
| 744 | size_t i; |
|---|
| 745 | bprof_call_stack *call_stack = &cntx->call_stack; |
|---|
| 746 | size_t nentries = cntx->nentries; |
|---|
| 747 | const bprofile_entry *table = cntx->table; |
|---|
| 748 | const bprofile_sample *cur_sample; |
|---|
| 749 | bprof_call_entry *cur; |
|---|
| 750 | int pos; |
|---|
| 751 | size_t size=0; |
|---|
| 752 | const bprof_thread_info *thread; |
|---|
| 753 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 754 | size_t j; |
|---|
| 755 | const bprof_threads *threads = &cntx->threads; |
|---|
| 756 | #endif |
|---|
| 757 | b_tick2ms_init(); |
|---|
| 758 | |
|---|
| 759 | |
|---|
| 760 | BKNI_Printf("total %u msec\n", b_tick2ms(cntx->total_time)); |
|---|
| 761 | #if BPROFILE_CFG_SINGLE_THREAD |
|---|
| 762 | BSTD_UNUSED(thread); |
|---|
| 763 | { |
|---|
| 764 | #else |
|---|
| 765 | for(thread=&threads->threads[0];thread<&threads->threads[threads->nelem];thread++) { |
|---|
| 766 | unsigned thread_time = 0; |
|---|
| 767 | unsigned diff; |
|---|
| 768 | unsigned prev_time; |
|---|
| 769 | BDBG_MSG(("[%#x] %#x:%#x %s", thread, thread->stack_min, thread->stack_max, sys_iface->thread_name(thread->thread_key))); |
|---|
| 770 | #endif |
|---|
| 771 | /* this loop keeps result table sorted by function address, this achieves |
|---|
| 772 | * O(log2(N)) lookup complexivity */ |
|---|
| 773 | call_stack->nelem = 0; |
|---|
| 774 | i=0; |
|---|
| 775 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 776 | /* skip other threads */ |
|---|
| 777 | for(;i<nentries;i++) { |
|---|
| 778 | if (b_thread_match(thread, B_GET_STACK(table[i]))) { |
|---|
| 779 | break; |
|---|
| 780 | } |
|---|
| 781 | } |
|---|
| 782 | prev_time = table[i].sample.time; |
|---|
| 783 | #endif |
|---|
| 784 | for(;i<nentries;i++) { |
|---|
| 785 | bprof_call_entry *parent; |
|---|
| 786 | unsigned cur_addr; |
|---|
| 787 | |
|---|
| 788 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 789 | /* BDBG_MSG(("entry %u %s", i, sys_iface->get_name(table[i].addr, buf, sizeof(buf)))); */ |
|---|
| 790 | diff=b_ts_diff(table[i].sample.time, prev_time); |
|---|
| 791 | if (diff<B_MAX_TS_DIFF) { |
|---|
| 792 | thread_time+=diff; |
|---|
| 793 | } |
|---|
| 794 | prev_time = table[i].sample.time; |
|---|
| 795 | if (!b_thread_match(thread, B_GET_STACK(table[i]))) { |
|---|
| 796 | |
|---|
| 797 | /* search thread match */ |
|---|
| 798 | for(j=i+1;j<nentries;j++) { |
|---|
| 799 | if (b_thread_match(thread, B_GET_STACK(table[j]))) { |
|---|
| 800 | break; |
|---|
| 801 | } |
|---|
| 802 | } |
|---|
| 803 | BDBG_MSG_TRACE(("filtered out entries %u..%u", i, j)); |
|---|
| 804 | if(j>=nentries) { |
|---|
| 805 | i=j; /* reached out of table */ |
|---|
| 806 | continue; |
|---|
| 807 | } |
|---|
| 808 | if (call_stack->nelem>0) { |
|---|
| 809 | /* account for the thread preemption */ |
|---|
| 810 | parent = &call_stack->stack[call_stack->nelem-1]; |
|---|
| 811 | bprofile_sample_add_diff(&parent->preempt, &table[j].sample, &table[i].sample); |
|---|
| 812 | bprofile_sample_add_diff(&parent->self, &table[i].sample, parent->child); |
|---|
| 813 | parent->child = &table[j].sample; |
|---|
| 814 | BDBG_MSG_TRACE(("filtered out %u[%u] entries %s %u[%#x:%u]...%s %u[%#x:%u] -> %u", j-i, call_stack->nelem, sys_iface->get_name(parent->addr, buf, sizeof(buf)), i, B_GET_STACK(table[i]), table[i].sample.time, sys_iface->get_name(table[j].addr, buf2, sizeof(buf2)), j, B_GET_STACK(table[j]), table[j].sample.time, parent->preempt.time)); |
|---|
| 815 | } |
|---|
| 816 | i=j; |
|---|
| 817 | prev_time = table[i].sample.time; |
|---|
| 818 | } |
|---|
| 819 | #endif |
|---|
| 820 | cur_addr = B_GET_ADDR(table[i]); |
|---|
| 821 | |
|---|
| 822 | cur_sample = &table[i].sample; |
|---|
| 823 | switch(B_GET_EVENT(table[i])) { |
|---|
| 824 | default: |
|---|
| 825 | BDBG_ASSERT(0); |
|---|
| 826 | break; |
|---|
| 827 | case B_PROFILE_EVENT_ENTER: |
|---|
| 828 | BDBG_ASSERT(call_stack->nelem < BPROF_CALL_MAX_STACK); |
|---|
| 829 | /* initialize call-stack entry */ |
|---|
| 830 | cur = &call_stack->stack[call_stack->nelem]; |
|---|
| 831 | cur->addr = cur_addr; |
|---|
| 832 | cur->enter = cur_sample; |
|---|
| 833 | cur->child = cur_sample; |
|---|
| 834 | cur->call_count = 0; |
|---|
| 835 | bprofile_sample_init(&cur->self); |
|---|
| 836 | bprofile_sample_init(&cur->all); |
|---|
| 837 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 838 | cur->stack = B_GET_STACK(table[i]); |
|---|
| 839 | bprofile_sample_init(&cur->preempt); |
|---|
| 840 | #endif |
|---|
| 841 | if(call_stack->nelem>0) { |
|---|
| 842 | /* update parent's self_time */ |
|---|
| 843 | parent = &call_stack->stack[call_stack->nelem-1]; |
|---|
| 844 | parent->call_count++; |
|---|
| 845 | bprofile_sample_add_diff(&parent->self, cur_sample, parent->child); |
|---|
| 846 | } |
|---|
| 847 | call_stack->nelem++; |
|---|
| 848 | break; |
|---|
| 849 | case B_PROFILE_EVENT_EXIT: |
|---|
| 850 | |
|---|
| 851 | pos = b_prof_get_pos(res, &size, cur_addr, sys_iface); |
|---|
| 852 | if (pos<0) { |
|---|
| 853 | continue; |
|---|
| 854 | } |
|---|
| 855 | BDBG_ASSERT(res[pos].addr == cur_addr); |
|---|
| 856 | if (call_stack->nelem>0) { |
|---|
| 857 | /* remove element from the top of stack */ |
|---|
| 858 | call_stack->nelem--; |
|---|
| 859 | cur = &call_stack->stack[call_stack->nelem]; |
|---|
| 860 | if(cur->addr != cur_addr) { |
|---|
| 861 | unsigned drop; |
|---|
| 862 | |
|---|
| 863 | for(drop=0;drop<=call_stack->nelem;drop++) { |
|---|
| 864 | if(call_stack->stack[call_stack->nelem-drop].addr == cur_addr) { |
|---|
| 865 | break; |
|---|
| 866 | } |
|---|
| 867 | } |
|---|
| 868 | BDBG_MSG(("out of sync:%u(%u) entry %u %s:%s pop %u", drop, call_stack->nelem, i, sys_iface->get_name(cur->addr, cntx->buf, sizeof(cntx->buf)), sys_iface->get_name(table[i].addr, cntx->buf2, sizeof(cntx->buf2)), i )); |
|---|
| 869 | if (drop>call_stack->nelem) { |
|---|
| 870 | call_stack->nelem++; /* unpaired exit, ignore it */ |
|---|
| 871 | break; |
|---|
| 872 | } |
|---|
| 873 | } |
|---|
| 874 | res[pos].count++; |
|---|
| 875 | res[pos].child_count += cur->call_count; |
|---|
| 876 | bprofile_sample_add_diff(&cur->all, cur_sample, cur->enter); |
|---|
| 877 | bprofile_sample_add(&res[pos].all, &cur->all); |
|---|
| 878 | bprofile_sample_add_diff(&cur->self, cur_sample, cur->child); |
|---|
| 879 | bprofile_sample_add(&res[pos].self, &cur->self); |
|---|
| 880 | /* BDBG_WRN(("%u/%u", cur->preempt.time, call_stack->nelem)); */ |
|---|
| 881 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 882 | bprofile_sample_add(&res[pos].preempt, &cur->preempt); |
|---|
| 883 | #endif |
|---|
| 884 | if (call_stack->nelem>0) { |
|---|
| 885 | /* update parent's child exit time */ |
|---|
| 886 | parent = &call_stack->stack[call_stack->nelem-1]; |
|---|
| 887 | parent->child = cur_sample; |
|---|
| 888 | } |
|---|
| 889 | } |
|---|
| 890 | |
|---|
| 891 | break; |
|---|
| 892 | } |
|---|
| 893 | } |
|---|
| 894 | /* handle functions that didn't exit yet */ |
|---|
| 895 | for(i=0;i<call_stack->nelem;i++) { |
|---|
| 896 | cur = &call_stack->stack[i]; |
|---|
| 897 | pos = b_prof_get_pos(res, &size, cur->addr, sys_iface); |
|---|
| 898 | if (pos<0) { |
|---|
| 899 | continue; |
|---|
| 900 | } |
|---|
| 901 | BDBG_ASSERT(res[pos].addr == cur->addr); |
|---|
| 902 | res[pos].count++; |
|---|
| 903 | bprofile_sample_add_diff(&cur->all, cur->child, cur->enter); |
|---|
| 904 | bprofile_sample_add(&res[pos].all, &cur->all); |
|---|
| 905 | bprofile_sample_add(&res[pos].self, &cur->self); |
|---|
| 906 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 907 | bprofile_sample_add(&res[pos].preempt, &cur->preempt); |
|---|
| 908 | #endif |
|---|
| 909 | } |
|---|
| 910 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 911 | if(sys_iface->split_threads && size > 0) { |
|---|
| 912 | unsigned load = cntx->total_time/1000; |
|---|
| 913 | if (load>0) { |
|---|
| 914 | load = thread_time/load; |
|---|
| 915 | } |
|---|
| 916 | /* print out per thread results */ |
|---|
| 917 | BKNI_Printf("thread: '%s' %3u.%1u%% self %u msec\n", sys_iface->thread_name(thread->thread_key), load/10, load%10, b_tick2ms(thread_time)); |
|---|
| 918 | b_profile_report_flat_print(cntx, res, size, thread_time); |
|---|
| 919 | size = 0; |
|---|
| 920 | } |
|---|
| 921 | #endif |
|---|
| 922 | } |
|---|
| 923 | #ifndef BPROFILE_CFG_SINGLE_THREAD |
|---|
| 924 | if(!sys_iface->split_threads) |
|---|
| 925 | #endif |
|---|
| 926 | { |
|---|
| 927 | b_profile_report_flat_print(cntx, res, size, cntx->total_time); |
|---|
| 928 | } |
|---|
| 929 | |
|---|
| 930 | BKNI_Free(res); |
|---|
| 931 | return size; |
|---|
| 932 | } |
|---|
| 933 | |
|---|
| 934 | size_t |
|---|
| 935 | bprofile_report_flat(bprofile_entry *table, size_t nentries, const bprofile_sys_iface *sys_iface) |
|---|
| 936 | { |
|---|
| 937 | return b_profile_report(b_profile_report_flat, table, nentries, sys_iface); |
|---|
| 938 | } |
|---|
| 939 | |
|---|
| 940 | static const char * |
|---|
| 941 | b_profile_no_get_name(uint32_t addr, char *buf, size_t buf_len) |
|---|
| 942 | { |
|---|
| 943 | BKNI_Snprintf(buf, buf_len, "%08lx", (unsigned long)addr); |
|---|
| 944 | return buf; |
|---|
| 945 | } |
|---|
| 946 | |
|---|
| 947 | static bprofile_thread_id |
|---|
| 948 | b_profile_no_thread_from_stack(const uint32_t *stack) |
|---|
| 949 | { |
|---|
| 950 | BSTD_UNUSED(stack); |
|---|
| 951 | return (bprofile_thread_id)(unsigned)b_profile_no_thread_from_stack; |
|---|
| 952 | } |
|---|
| 953 | |
|---|
| 954 | static const char * |
|---|
| 955 | b_profile_no_thread_name(bprofile_thread_id thread) |
|---|
| 956 | { |
|---|
| 957 | BSTD_UNUSED(thread); |
|---|
| 958 | return ""; |
|---|
| 959 | } |
|---|
| 960 | |
|---|
| 961 | void |
|---|
| 962 | bprofile_sys_iface_init(bprofile_sys_iface *sys_iface) |
|---|
| 963 | { |
|---|
| 964 | BDBG_ASSERT(sys_iface); |
|---|
| 965 | sys_iface->get_name = b_profile_no_get_name; |
|---|
| 966 | sys_iface->thread_from_stack = b_profile_no_thread_from_stack; |
|---|
| 967 | sys_iface->thread_name = b_profile_no_thread_name; |
|---|
| 968 | sys_iface->maxentries = 256; |
|---|
| 969 | sys_iface->show_entries = 256; |
|---|
| 970 | sys_iface->split_threads = false; |
|---|
| 971 | sys_iface->substract_overhead = false; |
|---|
| 972 | sys_iface->call_count = false; |
|---|
| 973 | sys_iface->preempt_time = false; |
|---|
| 974 | return; |
|---|
| 975 | } |
|---|