2b77bdde97
Profiles show false sharing in addr_compare() because refcnt/dtime changes dirty the first inet_peer cache line, where are lying the keys used at lookup time. If many cpus are calling inet_getpeer() and inet_putpeer(), or need frag ids, addr_compare() is in 2nd position in "perf top". Before patch, my udpflood bench (16 threads) on my 2x4x2 machine : 5784.00 9.7% csum_partial_copy_generic [kernel] 3356.00 5.6% addr_compare [kernel] 2638.00 4.4% fib_table_lookup [kernel] 2625.00 4.4% ip_fragment [kernel] 1934.00 3.2% neigh_lookup [kernel] 1617.00 2.7% udp_sendmsg [kernel] 1608.00 2.7% __ip_route_output_key [kernel] 1480.00 2.5% __ip_append_data [kernel] 1396.00 2.3% kfree [kernel] 1195.00 2.0% kmem_cache_free [kernel] 1157.00 1.9% inet_getpeer [kernel] 1121.00 1.9% neigh_resolve_output [kernel] 1012.00 1.7% dev_queue_xmit [kernel] # time ./udpflood.sh real 0m44.511s user 0m20.020s sys 11m22.780s # time ./udpflood.sh real 0m44.099s user 0m20.140s sys 11m15.870s After patch, no more addr_compare() in profiles : 4171.00 10.7% csum_partial_copy_generic [kernel] 1787.00 4.6% fib_table_lookup [kernel] 1756.00 4.5% ip_fragment [kernel] 1234.00 3.2% udp_sendmsg [kernel] 1191.00 3.0% neigh_lookup [kernel] 1118.00 2.9% __ip_append_data [kernel] 1022.00 2.6% kfree [kernel] 993.00 2.5% __ip_route_output_key [kernel] 841.00 2.2% neigh_resolve_output [kernel] 816.00 2.1% kmem_cache_free [kernel] 658.00 1.7% ia32_sysenter_target [kernel] 632.00 1.6% kmem_cache_alloc_node [kernel] # time ./udpflood.sh real 0m41.587s user 0m19.190s sys 10m36.370s # time ./udpflood.sh real 0m41.486s user 0m19.290s sys 10m33.650s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> |
||
---|---|---|
.. | ||
9p | ||
bluetooth | ||
caif | ||
irda | ||
iucv | ||
netfilter | ||
netns | ||
phonet | ||
sctp | ||
tc_act | ||
act_api.h | ||
addrconf.h | ||
af_ieee802154.h | ||
af_rxrpc.h | ||
af_unix.h | ||
ah.h | ||
arp.h | ||
atmclip.h | ||
ax25.h | ||
ax88796.h | ||
cfg80211.h | ||
checksum.h | ||
cipso_ipv4.h | ||
cls_cgroup.h | ||
compat.h | ||
datalink.h | ||
dcbevent.h | ||
dcbnl.h | ||
dn.h | ||
dn_dev.h | ||
dn_fib.h | ||
dn_neigh.h | ||
dn_nsp.h | ||
dn_route.h | ||
dsa.h | ||
dsfield.h | ||
dst.h | ||
dst_ops.h | ||
esp.h | ||
ethoc.h | ||
fib_rules.h | ||
flow.h | ||
garp.h | ||
gen_stats.h | ||
genetlink.h | ||
gre.h | ||
icmp.h | ||
ieee80211_radiotap.h | ||
ieee802154.h | ||
ieee802154_netdev.h | ||
if_inet6.h | ||
inet6_connection_sock.h | ||
inet6_hashtables.h | ||
inet_common.h | ||
inet_connection_sock.h | ||
inet_ecn.h | ||
inet_frag.h | ||
inet_hashtables.h | ||
inet_sock.h | ||
inet_timewait_sock.h | ||
inetpeer.h | ||
ip.h | ||
ip6_checksum.h | ||
ip6_fib.h | ||
ip6_route.h | ||
ip6_tunnel.h | ||
ip_fib.h | ||
ip_vs.h | ||
ipcomp.h | ||
ipconfig.h | ||
ipip.h | ||
ipv6.h | ||
ipx.h | ||
iw_handler.h | ||
lapb.h | ||
lib80211.h | ||
llc.h | ||
llc_c_ac.h | ||
llc_c_ev.h | ||
llc_c_st.h | ||
llc_conn.h | ||
llc_if.h | ||
llc_pdu.h | ||
llc_s_ac.h | ||
llc_s_ev.h | ||
llc_s_st.h | ||
llc_sap.h | ||
mac80211.h | ||
mip6.h | ||
mld.h | ||
ndisc.h | ||
neighbour.h | ||
net_namespace.h | ||
net_ratelimit.h | ||
netdma.h | ||
netevent.h | ||
netlabel.h | ||
netlink.h | ||
netrom.h | ||
nexthop.h | ||
nl802154.h | ||
p8022.h | ||
ping.h | ||
pkt_cls.h | ||
pkt_sched.h | ||
protocol.h | ||
psnap.h | ||
raw.h | ||
rawv6.h | ||
red.h | ||
regulatory.h | ||
request_sock.h | ||
rose.h | ||
route.h | ||
rtnetlink.h | ||
sch_generic.h | ||
scm.h | ||
slhc_vj.h | ||
snmp.h | ||
sock.h | ||
stp.h | ||
tcp.h | ||
tcp_states.h | ||
timewait_sock.h | ||
transp_v6.h | ||
udp.h | ||
udplite.h | ||
wext.h | ||
wimax.h | ||
wpan-phy.h | ||
x25.h | ||
x25device.h | ||
xfrm.h |