Class: Datadog::Profiling::Collectors::Stack

Inherits:
Worker
  • Object
show all
Includes:
Workers::Polling
Defined in:
lib/ddtrace/profiling/collectors/stack.rb

Overview

Collects stack trace samples from Ruby threads for both CPU-time (if available) and wall-clock. Runs on its own background thread.

Constant Summary collapse

DEFAULT_MAX_TIME_USAGE_PCT =
2.0
MIN_INTERVAL =
0.01
THREAD_LAST_CPU_TIME_KEY =
:datadog_profiler_last_cpu_time
THREAD_LAST_WALL_CLOCK_KEY =
:datadog_profiler_last_wall_clock
SYNTHETIC_STACK_IN_NATIVE_CODE =
[BacktraceLocation.new('', 0, 'In native code').freeze].freeze
DEFAULT_MAX_THREADS_SAMPLED =

This default was picked based on the current sampling performance and on expected concurrency on an average Ruby MRI application. Lowering this optimizes for latency (less impact each time we sample), and raising optimizes for coverage (less chance to miss what a given thread is doing).

16

Constants included from Workers::Polling

Workers::Polling::SHUTDOWN_TIMEOUT

Instance Attribute Summary collapse

Attributes inherited from Worker

#task

Instance Method Summary collapse

Methods included from Workers::Polling

#enabled=, #enabled?, included, #stop

Constructor Details

#initialize(recorder, max_frames:, trace_identifiers_helper:, ignore_thread: nil, max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT, max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED, thread_api: Thread, cpu_time_provider: Datadog::Profiling::NativeExtension, fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, interval: MIN_INTERVAL, enabled: true) ⇒ Stack

Returns a new instance of Stack.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 40

def initialize(
  recorder,
  max_frames:,
  trace_identifiers_helper:, # Usually an instance of Datadog::Profiling::TraceIdentifiers::Helper
  ignore_thread: nil,
  max_time_usage_pct: DEFAULT_MAX_TIME_USAGE_PCT,
  max_threads_sampled: DEFAULT_MAX_THREADS_SAMPLED,
  thread_api: Thread,
  cpu_time_provider: Datadog::Profiling::NativeExtension,
  fork_policy: Workers::Async::Thread::FORK_POLICY_RESTART, # Restart in forks by default
  interval: MIN_INTERVAL,
  enabled: true
)
  @recorder = recorder
  @max_frames = max_frames
  @trace_identifiers_helper = trace_identifiers_helper
  @ignore_thread = ignore_thread
  @max_time_usage_pct = max_time_usage_pct
  @max_threads_sampled = max_threads_sampled
  @thread_api = thread_api
  # Only set the provider if it's able to work in the current Ruby/OS combo
  @cpu_time_provider = cpu_time_provider unless cpu_time_provider.cpu_time_ns_for(thread_api.current).nil?

  # Workers::Async::Thread settings
  self.fork_policy = fork_policy

  # Workers::IntervalLoop settings
  self.loop_base_interval = interval

  # Workers::Polling settings
  self.enabled = enabled

  # Cache this proc, since it's pretty expensive to keep recreating it
  @build_backtrace_location = method(:build_backtrace_location).to_proc
  # Cache this buffer, since it's pretty expensive to keep accessing it
  @stack_sample_event_recorder = recorder[Events::StackSample]
  # See below for details on why this is needed
  @needs_process_waiter_workaround =
    Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.3') &&
    Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.7')
end

Instance Attribute Details

#cpu_time_providerObject (readonly)

Returns the value of attribute cpu_time_provider.



31
32
33
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 31

def cpu_time_provider
  @cpu_time_provider
end

#ignore_threadObject (readonly)

Returns the value of attribute ignore_thread.



31
32
33
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 31

def ignore_thread
  @ignore_thread
end

#max_framesObject (readonly)

Returns the value of attribute max_frames.



31
32
33
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 31

def max_frames
  @max_frames
end

#max_time_usage_pctObject (readonly)

Returns the value of attribute max_time_usage_pct.



31
32
33
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 31

def max_time_usage_pct
  @max_time_usage_pct
end

#recorderObject (readonly)

Returns the value of attribute recorder.



31
32
33
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 31

def recorder
  @recorder
end

#thread_apiObject (readonly)

Returns the value of attribute thread_api.



31
32
33
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 31

def thread_api
  @thread_api
end

#trace_identifiers_helperObject (readonly)

Returns the value of attribute trace_identifiers_helper.



31
32
33
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 31

def trace_identifiers_helper
  @trace_identifiers_helper
end

Instance Method Details

#build_backtrace_location(_id, base_label, lineno, path) ⇒ Object



210
211
212
213
214
215
216
217
218
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 210

def build_backtrace_location(_id, base_label, lineno, path)
  string_table = @stack_sample_event_recorder.string_table

  Profiling::BacktraceLocation.new(
    string_table.fetch_string(base_label),
    lineno,
    string_table.fetch_string(path)
  )
end

#collect_and_waitObject



91
92
93
94
95
96
97
98
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 91

def collect_and_wait
  run_time = Datadog::Utils::Time.measure do
    collect_events
  end

  # Update wait time to throttle profiling
  self.loop_wait_time = compute_wait_time(run_time)
end

#collect_eventsObject



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 100

def collect_events
  events = []
  current_wall_time_ns = get_current_wall_time_timestamp_ns

  # Collect backtraces from each thread
  threads_to_sample.each do |thread|
    next unless thread.alive?
    next if ignore_thread.is_a?(Proc) && ignore_thread.call(thread)

    event = collect_thread_event(thread, current_wall_time_ns)
    events << event unless event.nil?
  end

  # Send events to recorder
  recorder.push(events) unless events.empty?

  events
end

#collect_thread_event(thread, current_wall_time_ns) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 119

def collect_thread_event(thread, current_wall_time_ns)
  locations = thread.backtrace_locations
  return if locations.nil?

  # Having empty locations means that the thread is alive, but we don't know what it's doing:
  #
  # 1. It can be starting up
  #    ```
  #    > Thread.new { sleep }.backtrace
  #    => [] # <-- note the thread hasn't actually started running sleep yet, we got there first
  #    ```
  # 2. It can be running native code
  #    ```
  #    > t = Process.detach(fork { sleep })
  #    => #<Process::Waiter:0x00007ffe7285f7a0 run>
  #    > t.backtrace
  #    => [] # <-- this can happen even minutes later, e.g. it's not a race as in 1.
  #    ```
  #    This effect has been observed in threads created by the Iodine web server and the ffi gem
  #
  # To give customers visibility into these threads, we replace the empty stack with one containing a
  # synthetic placeholder frame, so that these threads are properly represented in the UX.
  locations = SYNTHETIC_STACK_IN_NATIVE_CODE if locations.empty?

  # Get actual stack size then trim the stack
  stack_size = locations.length
  locations = locations[0..(max_frames - 1)]

  # Convert backtrace locations into structs
  locations = convert_backtrace_locations(locations)

  thread_id = thread.object_id
  root_span_id, span_id, trace_resource = trace_identifiers_helper.trace_identifiers_for(thread)
  cpu_time = get_cpu_time_interval!(thread)
  wall_time_interval_ns =
    get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_WALL_CLOCK_KEY, current_wall_time_ns)

  Events::StackSample.new(
    nil,
    locations,
    stack_size,
    thread_id,
    root_span_id,
    span_id,
    trace_resource,
    cpu_time,
    wall_time_interval_ns
  )
end

#compute_wait_time(used_time) ⇒ Object



179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 179

def compute_wait_time(used_time)
  # We took used_time to get the last sample.
  #
  # What we're computing here is -- if used_time corresponds to max_time_usage_pct of the time we should
  # spend working, how much is (100% - max_time_usage_pct) of the time?
  #
  # For instance, if we took 10ms to sample, and max_time_usage_pct is 1%, then the other 99% is 990ms, which
  # means we need to sleep for 990ms to guarantee that we don't spend more than 1% of the time working.
  used_time_ns = used_time * 1e9
  interval = (used_time_ns / (max_time_usage_pct / 100.0)) - used_time_ns
  [interval / 1e9, MIN_INTERVAL].max
end

#convert_backtrace_locations(locations) ⇒ Object

Convert backtrace locations into structs Re-use old backtrace location objects if they already exist in the buffer



194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 194

def convert_backtrace_locations(locations)
  locations.collect do |location|
    # Re-use existing BacktraceLocation if identical copy, otherwise build a new one.
    @stack_sample_event_recorder.cache(:backtrace_locations).fetch(
      # Function name
      location.base_label,
      # Line number
      location.lineno,
      # Filename
      location.path,
      # Build function
      &@build_backtrace_location
    )
  end
end

#get_cpu_time_interval!(thread) ⇒ Object



169
170
171
172
173
174
175
176
177
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 169

def get_cpu_time_interval!(thread)
  return unless cpu_time_provider

  current_cpu_time_ns = cpu_time_provider.cpu_time_ns_for(thread)

  return unless current_cpu_time_ns

  get_elapsed_since_last_sample_and_set_value(thread, THREAD_LAST_CPU_TIME_KEY, current_cpu_time_ns)
end

#performObject



87
88
89
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 87

def perform
  collect_and_wait
end

#startObject



82
83
84
85
# File 'lib/ddtrace/profiling/collectors/stack.rb', line 82

def start
  reset_cpu_time_tracking
  perform
end