From 0497ab42f31e6a25d9887836123f9cf72498d3b5 Mon Sep 17 00:00:00 2001 From: kib Date: Sat, 10 Mar 2012 08:49:44 +0000 Subject: [PATCH 130/175] Optimize tls_get_addr_common(). The change provides around 30% speedup for TLS microbenchmark using global-dynamic TLS model on amd64 (which is default for PIC dso objects). Split the slow path into tls_get_addr_slow(), for which inlining is disabled. This prevents the registers spill on tls_get_addr_common() entry. Provide static branch hint to the compiler, indicating that slow path is not likely to be taken. While there, do some minimal style adjustments. Reported and tested by: davidxu MFC after: 1 week git-svn-id: http://svn.freebsd.org/base/head@232777 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f (cherry picked from commit 34bd392ae797322a1a061086e4ff7ec463f5d117) Signed-off-by: Xin Li --- libexec/rtld-elf/rtld.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 568d10b..b8524da 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -3507,17 +3507,17 @@ unref_dag(Obj_Entry *root) /* * Common code for MD __tls_get_addr(). */ -void * -tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset) +static void *tls_get_addr_slow(Elf_Addr **, int, size_t) __noinline; +static void * +tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset) { - Elf_Addr* dtv = *dtvp; + Elf_Addr *newdtv, *dtv; RtldLockState lockstate; + int to_copy; + dtv = *dtvp; /* Check dtv generation in case new modules have arrived */ if (dtv[0] != tls_dtv_generation) { - Elf_Addr* newdtv; - int to_copy; - wlock_acquire(rtld_bind_lock, &lockstate); newdtv = calloc(1, (tls_max_index + 2) * sizeof(Elf_Addr)); to_copy = dtv[1]; @@ -3532,14 +3532,27 @@ tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset) } /* Dynamically allocate module TLS if necessary */ - if (!dtv[index + 1]) { + if (dtv[index + 1] == 0) { /* Signal safe, wlock will block out signals. */ - wlock_acquire(rtld_bind_lock, &lockstate); + wlock_acquire(rtld_bind_lock, &lockstate); if (!dtv[index + 1]) dtv[index + 1] = (Elf_Addr)allocate_module_tls(index); lock_release(rtld_bind_lock, &lockstate); } - return (void*) (dtv[index + 1] + offset); + return ((void *)(dtv[index + 1] + offset)); +} + +void * +tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset) +{ + Elf_Addr *dtv; + + dtv = *dtvp; + /* Check dtv generation in case new modules have arrived */ + if (__predict_true(dtv[0] == tls_dtv_generation && + dtv[index + 1] != 0)) + return ((void *)(dtv[index + 1] + offset)); + return (tls_get_addr_slow(dtvp, index, offset)); } #if defined(__arm__) || defined(__ia64__) || defined(__mips__) || defined(__powerpc__) -- 1.7.9.4