| [2] | 1 | /*************************************************************************** |
|---|
| 2 | * Copyright (c) 2006, Broadcom Corporation |
|---|
| 3 | * All Rights Reserved |
|---|
| 4 | * Confidential Property of Broadcom Corporation |
|---|
| 5 | * |
|---|
| 6 | * THIS SOFTWARE MAY ONLY BE USED SUBJECT TO AN EXECUTED SOFTWARE LICENSE |
|---|
| 7 | * AGREEMENT BETWEEN THE USER AND BROADCOM. YOU HAVE NO RIGHT TO USE OR |
|---|
| 8 | * EXPLOIT THIS MATERIAL EXCEPT SUBJECT TO THE TERMS OF SUCH AN AGREEMENT. |
|---|
| 9 | * |
|---|
| 10 | * $brcm_Workfile: bprofile_data.c $ |
|---|
| 11 | * $brcm_Revision: 13 $ |
|---|
| 12 | * $brcm_Date: 12/14/06 4:37p $ |
|---|
| 13 | * |
|---|
| 14 | * Module Description: |
|---|
| 15 | * |
|---|
| 16 | * Embeddeble profiler library |
|---|
| 17 | * Data acquisition module |
|---|
| 18 | * |
|---|
| 19 | * Revision History: |
|---|
| 20 | * |
|---|
| 21 | * $brcm_Log: /BSEAV/lib/bprofile/bprofile_data.c $ |
|---|
| 22 | * |
|---|
| 23 | * 13 12/14/06 4:37p vsilyaev |
|---|
| 24 | * PR 25997: More accurate adjusting of profile data |
|---|
| 25 | * |
|---|
| 26 | * 12 12/13/06 7:44p vsilyaev |
|---|
| 27 | * PR 25997: Somehow better calibration routines |
|---|
| 28 | * |
|---|
| 29 | * 11 12/11/06 6:21p vsilyaev |
|---|
| 30 | * PR 25997: Added mode to eleminate estimated overhead of instrumented |
|---|
| 31 | * code |
|---|
| 32 | * |
|---|
| 33 | * 10 12/11/06 12:45p vsilyaev |
|---|
| 34 | * PR 25997: Added calibrate routine |
|---|
| 35 | * |
|---|
| 36 | * 9 12/9/06 12:04p vsilyaev |
|---|
| 37 | * PR 25997: Improved posprocessing of multi-thread profile data |
|---|
| 38 | * |
|---|
| 39 | * 8 12/8/06 7:22p vsilyaev |
|---|
| 40 | * PR 25997: calibrate profiler only during the first run |
|---|
| 41 | * |
|---|
| 42 | * 7 12/7/06 2:43p vsilyaev |
|---|
| 43 | * PR 25997: Added fixes for 3.4 GCC compiler |
|---|
| 44 | * |
|---|
| 45 | * 6 12/5/06 4:31p vsilyaev |
|---|
| 46 | * PR 25997: Added MIPS performance counters |
|---|
| 47 | * |
|---|
| 48 | * 5 12/5/06 11:59a vsilyaev |
|---|
| 49 | * PR 25997: Added faster, single threaded, probe routine |
|---|
| 50 | * |
|---|
| 51 | * 4 11/30/06 8:33p vsilyaev |
|---|
| 52 | * PR 25997: Use atomic variable to prevent missing data |
|---|
| 53 | * |
|---|
| 54 | * 3 11/28/06 4:48p vsilyaev |
|---|
| 55 | * PR 25997: Added multithreaded profiling |
|---|
| 56 | * |
|---|
| 57 | * 2 11/16/06 6:59p vsilyaev |
|---|
| 58 | * PR 25997: Added UCOS support |
|---|
| 59 | * |
|---|
| 60 | * 1 11/16/06 5:29p vsilyaev |
|---|
| 61 | * PR 25997: Embeddable profiler |
|---|
| 62 | * |
|---|
| 63 | *******************************************************************************/ |
|---|
| 64 | #include "bstd.h" |
|---|
| 65 | #include "bprofile.h" |
|---|
| 66 | #include "bkni.h" |
|---|
| 67 | #include "batomic.h" |
|---|
| 68 | #include "btrc.h" |
|---|
| 69 | #include "bprofile_tick.h" |
|---|
| 70 | |
|---|
| 71 | |
|---|
| 72 | |
|---|
| 73 | BDBG_MODULE(bprofile); |
|---|
| 74 | |
|---|
| 75 | #define DONT_PROFILE __attribute__((no_instrument_function)) |
|---|
| 76 | |
|---|
| 77 | struct bprofile_state { |
|---|
| 78 | /* bprofile_entry *next; */ |
|---|
| 79 | batomic_t next; |
|---|
| 80 | batomic_t last; |
|---|
| 81 | const bprofile_entry *first; |
|---|
| 82 | bprofile_probe_info info; |
|---|
| 83 | }; |
|---|
| 84 | |
|---|
| 85 | #define B_PROFILE_SEED 1 |
|---|
| 86 | |
|---|
| 87 | static struct bprofile_state b_profile_state = { |
|---|
| 88 | BATOMIC_INIT(B_PROFILE_SEED), |
|---|
| 89 | BATOMIC_INIT(B_PROFILE_SEED), |
|---|
| 90 | (void *)B_PROFILE_SEED, |
|---|
| 91 | { |
|---|
| 92 | {0 /* time */ |
|---|
| 93 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 94 | ,{0 |
|---|
| 95 | #if BPROFILE_CFG_PERF_COUNTER > 1 |
|---|
| 96 | ,0 |
|---|
| 97 | #endif |
|---|
| 98 | #if BPROFILE_CFG_PERF_COUNTER > 2 |
|---|
| 99 | ,0 |
|---|
| 100 | #endif |
|---|
| 101 | #if BPROFILE_CFG_PERF_COUNTER > 3 |
|---|
| 102 | ,0 |
|---|
| 103 | #endif |
|---|
| 104 | } |
|---|
| 105 | #endif |
|---|
| 106 | }} |
|---|
| 107 | }; |
|---|
| 108 | |
|---|
| 109 | void DONT_PROFILE |
|---|
| 110 | b__profile_start(bprofile_entry *table, size_t nelem) |
|---|
| 111 | { |
|---|
| 112 | if (nelem>4) { |
|---|
| 113 | BKNI_Memset(table, 0, sizeof(*table)*nelem); |
|---|
| 114 | b_profile_state.first = table; |
|---|
| 115 | batomic_set(&b_profile_state.next, ((unsigned)table)-sizeof(*table)); |
|---|
| 116 | batomic_set(&b_profile_state.last, ((unsigned)table)+(nelem-4)*sizeof(*table)); |
|---|
| 117 | } |
|---|
| 118 | return; |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | |
|---|
| 122 | int DONT_PROFILE |
|---|
| 123 | bprofile_stop(void) |
|---|
| 124 | { |
|---|
| 125 | bprofile_entry *entry = (bprofile_entry *)batomic_get(&b_profile_state.next); |
|---|
| 126 | /* printf("stop first:%p last:%p next:%p %d\n", b_profile_state.first, b_profile_state.last, b_profile_state.next, b_profile_state.next - b_profile_state.first); */ |
|---|
| 127 | batomic_set(&b_profile_state.last, 1); |
|---|
| 128 | return (entry+1) - b_profile_state.first; |
|---|
| 129 | } |
|---|
| 130 | |
|---|
| 131 | int DONT_PROFILE |
|---|
| 132 | bprofile_poll(void) |
|---|
| 133 | { |
|---|
| 134 | bprofile_entry *entry = (bprofile_entry *)batomic_get(&b_profile_state.next); |
|---|
| 135 | return (entry+1) - b_profile_state.first; |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | static unsigned DONT_PROFILE |
|---|
| 139 | b_profile_dummy_0(unsigned a, unsigned b) |
|---|
| 140 | { |
|---|
| 141 | return a+b; |
|---|
| 142 | } |
|---|
| 143 | |
|---|
| 144 | static unsigned DONT_PROFILE |
|---|
| 145 | b_profile_dummy_1(unsigned a, unsigned (*dummy)(unsigned, unsigned)) |
|---|
| 146 | { |
|---|
| 147 | return a+dummy(a,a); |
|---|
| 148 | } |
|---|
| 149 | |
|---|
| 150 | static unsigned |
|---|
| 151 | b_profile_dummy_2(unsigned a, unsigned (*dummy)(unsigned, unsigned)) |
|---|
| 152 | { |
|---|
| 153 | return a+dummy(a,a); |
|---|
| 154 | } |
|---|
| 155 | |
|---|
| 156 | |
|---|
| 157 | BTRC_MODULE(bprofile_nosample,ENABLE); |
|---|
| 158 | BTRC_MODULE(bprofile_sample,ENABLE); |
|---|
| 159 | |
|---|
| 160 | BSTD_INLINE void |
|---|
| 161 | b_profile_sample_diff(bprofile_sample *a, const bprofile_sample *b) |
|---|
| 162 | { |
|---|
| 163 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 164 | unsigned i; |
|---|
| 165 | for(i=0;i<BPROFILE_CFG_PERF_COUNTER;i++) { |
|---|
| 166 | a->counters[i] = bperf_sample_diff(a->counters[i], b->counters[i]) ; |
|---|
| 167 | } |
|---|
| 168 | #endif |
|---|
| 169 | a->time = a->time - b->time; |
|---|
| 170 | return; |
|---|
| 171 | } |
|---|
| 172 | |
|---|
| 173 | static unsigned DONT_PROFILE |
|---|
| 174 | b_profile_calibrate(bprofile_entry *table) |
|---|
| 175 | { |
|---|
| 176 | unsigned i; |
|---|
| 177 | bprofile_sample sample0,sample1,sample2; |
|---|
| 178 | unsigned (*dummy)(unsigned, unsigned (*)(unsigned, unsigned)); /* used function pointers that function call isn't inlined by accident */ |
|---|
| 179 | const unsigned nloops = 10 * 1000; |
|---|
| 180 | unsigned time_100us; |
|---|
| 181 | b_tick2ms_init(); |
|---|
| 182 | |
|---|
| 183 | BSTD_UNUSED(table); |
|---|
| 184 | |
|---|
| 185 | dummy = b_profile_dummy_1; |
|---|
| 186 | BTRC_TRACE(bprofile_nosample, START); |
|---|
| 187 | b_sample(&sample0); |
|---|
| 188 | for(i=nloops;i>0;i--) { |
|---|
| 189 | dummy(0, b_profile_dummy_0); |
|---|
| 190 | } |
|---|
| 191 | b_sample(&sample1); |
|---|
| 192 | BTRC_TRACE(bprofile_nosample, STOP); |
|---|
| 193 | b_profile_sample_diff(&sample1, &sample0); |
|---|
| 194 | dummy = b_profile_dummy_2; |
|---|
| 195 | BTRC_TRACE(bprofile_sample, START); |
|---|
| 196 | b_sample(&sample0); |
|---|
| 197 | for(i=nloops;i>0;i--) { |
|---|
| 198 | dummy(0, b_profile_dummy_0); |
|---|
| 199 | } |
|---|
| 200 | b_sample(&sample2); |
|---|
| 201 | BTRC_TRACE(bprofile_sample, STOP); |
|---|
| 202 | b_profile_sample_diff(&sample2, &sample0); |
|---|
| 203 | if (sample0.time >= sample1.time) { |
|---|
| 204 | sample0.time = sample2.time - sample1.time; |
|---|
| 205 | } else { |
|---|
| 206 | sample0.time = 0; |
|---|
| 207 | } |
|---|
| 208 | time_100us = b_tick2_100us(sample0.time); |
|---|
| 209 | |
|---|
| 210 | BDBG_WRN(("profiler overhead %u.%02u us per function call (%u ticks)", time_100us/100, time_100us%100, sample0.time/nloops)); |
|---|
| 211 | #if BPROFILE_CFG_PERF_COUNTER |
|---|
| 212 | { |
|---|
| 213 | const bperf_counter_mode *mode = bperf_get_mode(); |
|---|
| 214 | |
|---|
| 215 | for(i=0;mode && i<BPROFILE_CFG_PERF_COUNTER;i++) { |
|---|
| 216 | if(sample2.counters[i] >= sample1.counters[i]) { |
|---|
| 217 | sample0.counters[i] = sample2.counters[i] - sample1.counters[i]; |
|---|
| 218 | } else { |
|---|
| 219 | sample0.counters[i] = 0; |
|---|
| 220 | } |
|---|
| 221 | b_profile_state.info.overhead.counters[i] = sample0.counters[i]/nloops; |
|---|
| 222 | BDBG_WRN(("profiler %s overhead %u per function call %u", mode->counter_names[i], b_profile_state.info.overhead.counters[i], sample2.counters[i])); |
|---|
| 223 | } |
|---|
| 224 | } |
|---|
| 225 | #endif |
|---|
| 226 | |
|---|
| 227 | b_profile_state.info.overhead.time = sample2.time/nloops; |
|---|
| 228 | return b_profile_state.info.overhead.time; |
|---|
| 229 | } |
|---|
| 230 | |
|---|
| 231 | unsigned |
|---|
| 232 | bprofile_calibrate(bprofile_entry *table, size_t nelem) |
|---|
| 233 | { |
|---|
| 234 | b__profile_start(table, nelem); |
|---|
| 235 | return b_profile_calibrate(table); |
|---|
| 236 | } |
|---|
| 237 | |
|---|
| 238 | void |
|---|
| 239 | bprofile_get_info(bprofile_probe_info *info) |
|---|
| 240 | { |
|---|
| 241 | *info = b_profile_state.info; |
|---|
| 242 | return; |
|---|
| 243 | } |
|---|
| 244 | |
|---|
| 245 | void DONT_PROFILE |
|---|
| 246 | bprofile_start(bprofile_entry *table, size_t nelem) |
|---|
| 247 | { |
|---|
| 248 | b__profile_start(table, nelem); |
|---|
| 249 | return; |
|---|
| 250 | } |
|---|
| 251 | |
|---|
| 252 | BSTD_INLINE void DONT_PROFILE |
|---|
| 253 | b_addentry(unsigned type, void *func) |
|---|
| 254 | { |
|---|
| 255 | unsigned event; |
|---|
| 256 | #if BPROFILE_CFG_SINGLE_THREAD |
|---|
| 257 | bprofile_entry *entry = (bprofile_entry *)(batomic_get(&b_profile_state.next)+sizeof(*entry)); |
|---|
| 258 | if ( (unsigned)entry < (unsigned)batomic_get(&b_profile_state.last)) { |
|---|
| 259 | event = (unsigned)func; |
|---|
| 260 | #if 0 |
|---|
| 261 | event &= &(~B_PROFILE_EVENT_MASK); /* function address has to be 32 bit alligned */ |
|---|
| 262 | #endif |
|---|
| 263 | event |= type; |
|---|
| 264 | entry->addr = event; |
|---|
| 265 | batomic_set(&b_profile_state.next, (int)entry); |
|---|
| 266 | b_sample(&entry->sample); |
|---|
| 267 | } |
|---|
| 268 | #else |
|---|
| 269 | if ( (unsigned)batomic_get(&b_profile_state.next) < (unsigned)batomic_get(&b_profile_state.last)) { |
|---|
| 270 | bprofile_entry *entry = (bprofile_entry *)batomic_add_return(&b_profile_state.next,sizeof(*entry)); |
|---|
| 271 | b_get_stack(&event); |
|---|
| 272 | #if 0 |
|---|
| 273 | event &= (~B_PROFILE_EVENT_MASK); /* stack pointer has to be 32 bit alligned */ |
|---|
| 274 | #endif |
|---|
| 275 | event= event | type; |
|---|
| 276 | entry->event_0 = event; |
|---|
| 277 | entry->addr = (unsigned)func; |
|---|
| 278 | b_sample(&entry->sample); |
|---|
| 279 | } |
|---|
| 280 | #endif |
|---|
| 281 | return; |
|---|
| 282 | } |
|---|
| 283 | |
|---|
| 284 | |
|---|
| 285 | void DONT_PROFILE |
|---|
| 286 | __cyg_profile_func_enter (void *func, void *caller) |
|---|
| 287 | { |
|---|
| 288 | BSTD_UNUSED(caller); |
|---|
| 289 | b_addentry(B_PROFILE_EVENT_ENTER, func); |
|---|
| 290 | } |
|---|
| 291 | |
|---|
| 292 | void DONT_PROFILE |
|---|
| 293 | __cyg_profile_func_exit (void *func, void *caller) |
|---|
| 294 | { |
|---|
| 295 | BSTD_UNUSED(caller); |
|---|
| 296 | b_addentry(B_PROFILE_EVENT_EXIT, func); |
|---|
| 297 | } |
|---|
| 298 | |
|---|