Skip to content

Commit c5c817f

Browse files
committed
Prepare getting rid of CompleteSpan
1 parent 693b134 commit c5c817f

File tree

6 files changed

+139
-104
lines changed

6 files changed

+139
-104
lines changed

src/workerd/io/trace.c++

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,14 +1516,11 @@ CompleteSpan::CompleteSpan(rpc::UserSpanData::Reader reader)
15161516
}
15171517
}
15181518

1519-
CompleteSpan CompleteSpan::clone() const {
1520-
CompleteSpan copy(spanId, parentSpanId, operationName.clone(), startTime, endTime);
1521-
copy.tags.reserve(tags.size());
1522-
for (auto& tag: tags) {
1523-
copy.tags.insert(tag.key.clone(), spanTagClone(tag.value));
1524-
}
1525-
return copy;
1526-
}
1519+
SpanOpenData::SpanOpenData(rpc::SpanOpenData::Reader reader)
1520+
: spanId(reader.getSpanId()),
1521+
parentSpanId(reader.getParentSpanId()),
1522+
operationName(kj::str(reader.getOperationName())),
1523+
startTime(kj::UNIX_EPOCH + reader.getStartTimeNs() * kj::NANOSECONDS) {}
15271524

15281525
void SpanOpenData::copyTo(rpc::SpanOpenData::Builder builder) const {
15291526
builder.setOperationName(operationName.asPtr());
@@ -1532,12 +1529,37 @@ void SpanOpenData::copyTo(rpc::SpanOpenData::Builder builder) const {
15321529
builder.setParentSpanId(parentSpanId);
15331530
}
15341531

1535-
SpanOpenData::SpanOpenData(rpc::SpanOpenData::Reader reader)
1532+
SpanEndData::SpanEndData(CompleteSpan&& span)
1533+
: spanId(span.spanId),
1534+
startTime(span.startTime),
1535+
endTime(span.endTime),
1536+
tags(kj::mv(span.tags)) {}
1537+
1538+
SpanEndData::SpanEndData(rpc::SpanEndData::Reader reader)
15361539
: spanId(reader.getSpanId()),
1537-
parentSpanId(reader.getParentSpanId()),
1538-
operationName(kj::str(reader.getOperationName())),
1539-
startTime(kj::UNIX_EPOCH + reader.getStartTimeNs() * kj::NANOSECONDS) {}
1540+
startTime(kj::UNIX_EPOCH + reader.getStartTimeNs() * kj::NANOSECONDS),
1541+
endTime(kj::UNIX_EPOCH + reader.getEndTimeNs() * kj::NANOSECONDS) {
1542+
auto tagsParam = reader.getTags();
1543+
tags.reserve(tagsParam.size());
1544+
for (auto tagParam: tagsParam) {
1545+
tags.insert(kj::ConstString(kj::heapString(tagParam.getKey())),
1546+
deserializeTagValue(tagParam.getValue()));
1547+
}
1548+
}
1549+
1550+
void SpanEndData::copyTo(rpc::SpanEndData::Builder builder) const {
1551+
builder.setStartTimeNs((startTime - kj::UNIX_EPOCH) / kj::NANOSECONDS);
1552+
builder.setEndTimeNs((endTime - kj::UNIX_EPOCH) / kj::NANOSECONDS);
1553+
builder.setSpanId(spanId);
15401554

1555+
auto tagsParam = builder.initTags(tags.size());
1556+
auto i = 0;
1557+
for (auto& tag: tags) {
1558+
auto tagParam = tagsParam[i++];
1559+
tagParam.setKey(tag.key.asPtr());
1560+
serializeTagValue(tagParam.initValue(), tag.value);
1561+
}
1562+
}
15411563
} // namespace tracing
15421564

15431565
// ======================================================================================

src/workerd/io/trace.capnp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ struct UserSpanData {
4343
}
4444

4545
struct SpanOpenData {
46-
# Representation of a SpanOpen event
46+
# Representation of a SpanOpen event, created when a user span is opened.
4747
operationName @0 :Text;
4848

4949
startTimeNs @1 :Int64;
@@ -55,15 +55,15 @@ struct SpanOpenData {
5555

5656
struct SpanEndData {
5757
# Representation of an event that indicates completion of a user span. This information is
58-
# provided to the tail worker in the Attributes and SpanClose events.
58+
# provided to the streaming tail worker in the Attributes and SpanClose events.
5959

60-
# TODO: These can probably go?
61-
operationName @0 :Text;
62-
startTimeNs @1 :Int64;
60+
# TODO(cleanup): startTimeNs is merely used as a fallback timestamp, consider obsoleting it.
61+
startTimeNs @0 :Int64;
6362
# Nanoseconds since Unix epoch
64-
endTimeNs @2 :Int64;
63+
endTimeNs @1 :Int64;
6564
# Nanoseconds since Unix epoch
6665

67-
tags @3 :List(Tag);
68-
spanId @4 :UInt64;
69-
}
66+
# List of span attributes
67+
tags @2 :List(Tag);
68+
spanId @3 :UInt64;
69+
}

src/workerd/io/trace.h

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,6 @@ struct CompleteSpan {
632632

633633
CompleteSpan(rpc::UserSpanData::Reader reader);
634634
void copyTo(rpc::UserSpanData::Builder builder) const;
635-
// TODO: Is clone() still needed?
636-
CompleteSpan clone() const;
637635
explicit CompleteSpan(tracing::SpanId spanId,
638636
tracing::SpanId parentSpanId,
639637
kj::ConstString operationName,
@@ -669,6 +667,30 @@ struct SpanOpenData {
669667
startTime(startTime) {}
670668
};
671669

670+
struct SpanEndData {
671+
// Represents the data needed when closing a span, including the Attributes and SpanClose events.
672+
tracing::SpanId spanId;
673+
674+
kj::Date startTime;
675+
kj::Date endTime;
676+
// Should be Span::TagMap, but we can't forward-declare that.
677+
kj::HashMap<kj::ConstString, tracing::Attribute::Value> tags;
678+
679+
// Convert CompleteSpan to SpanEndData
680+
explicit SpanEndData(CompleteSpan&& span);
681+
SpanEndData(rpc::SpanEndData::Reader reader);
682+
void copyTo(rpc::SpanEndData::Builder builder) const;
683+
explicit SpanEndData(tracing::SpanId spanId,
684+
kj::Date startTime,
685+
kj::Date endTime,
686+
kj::HashMap<kj::ConstString, tracing::Attribute::Value> tags =
687+
kj::HashMap<kj::ConstString, tracing::Attribute::Value>())
688+
: spanId(spanId),
689+
startTime(startTime),
690+
endTime(endTime),
691+
tags(kj::mv(tags)) {}
692+
};
693+
672694
// A Return mark is used to mark the point at which a span operation returned
673695
// a value. For instance, when a fetch subrequest response is received, or when
674696
// the fetch handler returns a Response. Importantly, it does not signal that the
@@ -1132,10 +1154,10 @@ class SpanObserver: public kj::Refcounted {
11321154

11331155
// Report the span data. Called at the end of the span.
11341156
//
1135-
// This should always be called exactly once per observer.
1157+
// This should always be called exactly once per observer at span completion time.
11361158
virtual void report(const Span& span) = 0;
1159+
// Report information about the span onset.
11371160
virtual void reportStart(kj::ConstString& operationName, kj::Date startTime) = 0;
1138-
virtual void reportEnd(const Span& span) = 0;
11391161

11401162
// The current time to be provided for the span. For user tracing, we will override this to
11411163
// provide I/O time. This *requires* that spans are only created when an IOContext is available

src/workerd/io/tracer.c++

Lines changed: 58 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -124,71 +124,17 @@ void WorkerTracer::addLog(const tracing::InvocationSpanContext& context,
124124
}
125125

126126
void WorkerTracer::addSpan(tracing::CompleteSpan&& span) {
127+
// The span information is not transmitted via RPC at this point, we can decompose the span into
128+
// spanOpen/spanEnd.
127129
addSpanOpen(span.spanId, span.parentSpanId, span.operationName, span.startTime);
128-
addSpanEnd(kj::mv(span));
129-
#if 0
130-
// This is where we'll actually encode the span.
131-
if (pipelineLogLevel == PipelineLogLevel::NONE) {
132-
return;
133-
}
134-
135-
// Note: spans are not available in the buffered tail worker, so we don't need an exceededSpanLimit
136-
// variable for it and it can't cause truncation.
137-
auto& tailStreamWriter = KJ_UNWRAP_OR_RETURN(maybeTailStreamWriter);
138-
139-
adjustSpanTime(span);
140-
141-
size_t spanTagsSize = 0;
142-
size_t spanNameSize = span.operationName.size();
143-
for (const Span::TagMap::Entry& tag: span.tags) {
144-
spanTagsSize += tag.key.size();
145-
KJ_SWITCH_ONEOF(tag.value) {
146-
KJ_CASE_ONEOF(str, kj::ConstString) {
147-
spanTagsSize += str.size();
148-
}
149-
KJ_CASE_ONEOF(val, bool) {
150-
spanTagsSize++;
151-
}
152-
// int64_t and double
153-
KJ_CASE_ONEOF_DEFAULT {
154-
spanTagsSize += sizeof(int64_t);
155-
}
156-
}
157-
}
158-
159-
// Compose span events – attributes and spanClose are transmitted together for now.
160-
auto& topLevelContext = KJ_ASSERT_NONNULL(topLevelInvocationSpanContext);
161-
// Compose span events. For SpanOpen, an all-zero spanId is interpreted as having no spans above
162-
// this one, thus we use the Onset spanId instead (taken from topLevelContext). We go to great
163-
// lengths to rule out getting an all-zero spanId by chance (see SpanId::fromEntropy()), so this
164-
// should be safe.
165-
tracing::SpanId parentSpanId = span.parentSpanId;
166-
if (parentSpanId == tracing::SpanId::nullId) {
167-
parentSpanId = topLevelContext.getSpanId();
168-
}
169-
170-
auto spanOpenContext = tracing::InvocationSpanContext(
171-
topLevelContext.getTraceId(), topLevelContext.getInvocationId(), parentSpanId);
172-
auto spanComponentContext = tracing::InvocationSpanContext(
173-
topLevelContext.getTraceId(), topLevelContext.getInvocationId(), span.spanId);
174-
tailStreamWriter->report(spanOpenContext,
175-
tracing::SpanOpen(span.spanId, span.operationName.clone()), span.startTime, spanNameSize);
176-
// If a span manages to exceed the size limit, truncate it by not providing span attributes.
177-
if (span.tags.size() && spanTagsSize <= MAX_TRACE_BYTES) {
178-
tracing::CustomInfo attr = KJ_MAP(tag, span.tags) {
179-
return tracing::Attribute(kj::mv(tag.key), kj::mv(tag.value));
180-
};
181-
tailStreamWriter->report(spanComponentContext, kj::mv(attr), span.startTime, spanTagsSize);
182-
}
183-
tailStreamWriter->report(spanComponentContext, tracing::SpanClose(), span.endTime, 0);
184-
#endif
130+
tracing::SpanEndData spanEnd(kj::mv(span));
131+
addSpanEnd(kj::mv(spanEnd));
185132
}
186133

187134
void WorkerTracer::addSpanOpen(tracing::SpanId spanId,
188135
tracing::SpanId parentSpanId,
189136
kj::ConstString& operationName,
190137
kj::Date startTime) {
191-
// This is where we'll actually encode the span.
192138
if (pipelineLogLevel == PipelineLogLevel::NONE) {
193139
return;
194140
}
@@ -208,8 +154,7 @@ void WorkerTracer::addSpanOpen(tracing::SpanId spanId,
208154
spanOpenContext, tracing::SpanOpen(spanId, operationName.clone()), startTime, spanNameSize);
209155
}
210156

211-
void WorkerTracer::addSpanEnd(tracing::CompleteSpan&& span) {
212-
// This is where we'll actually encode the span.
157+
void WorkerTracer::addSpanEnd(tracing::SpanEndData&& span) {
213158
if (pipelineLogLevel == PipelineLogLevel::NONE) {
214159
return;
215160
}
@@ -237,7 +182,8 @@ void WorkerTracer::addSpanEnd(tracing::CompleteSpan&& span) {
237182
}
238183
}
239184

240-
// Compose span events – attributes and spanClose are transmitted together for now.
185+
// Compose Attributes and SpanClose, which are available at span completion time and transmitted
186+
// together.
241187
auto& topLevelContext = KJ_ASSERT_NONNULL(topLevelInvocationSpanContext);
242188
auto spanComponentContext = tracing::InvocationSpanContext(
243189
topLevelContext.getTraceId(), topLevelContext.getInvocationId(), span.spanId);
@@ -513,6 +459,56 @@ void BaseTracer::adjustSpanTime(tracing::CompleteSpan& span) {
513459
}
514460
}
515461

462+
void BaseTracer::adjustSpanTime(tracing::SpanEndData& span) {
463+
// To report I/O time, we need the IOContext to still be alive.
464+
// weakIoContext is only none if we are tracing via RPC (in this case span times have already been
465+
// adjusted) or if we failed to transmit an Onset event (in that case we'll get an error based on
466+
// missing topLevelInvocationSpanContext right after).
467+
if (weakIoContext != kj::none) {
468+
auto& weakIoCtx = KJ_ASSERT_NONNULL(weakIoContext);
469+
weakIoCtx->runIfAlive([this, &span](IoContext& context) {
470+
if (context.hasCurrentIncomingRequest()) {
471+
span.endTime = context.now();
472+
} else {
473+
// We have an IOContext, but there's no current IncomingRequest. Always log a warning here,
474+
// this should not be happening. Still report completeTime as a useful timestamp if
475+
// available.
476+
bool hasCompleteTime = false;
477+
if (completeTime != kj::UNIX_EPOCH) {
478+
span.endTime = completeTime;
479+
hasCompleteTime = true;
480+
} else {
481+
span.endTime = span.startTime;
482+
}
483+
if (isPredictableModeForTest()) {
484+
KJ_FAIL_ASSERT("reported span without current request", hasCompleteTime);
485+
} else {
486+
LOG_WARNING_PERIODICALLY("reported span without current request");
487+
}
488+
}
489+
});
490+
if (!weakIoCtx->isValid()) {
491+
// This can happen if we start a customEvent from this event and cancel it after this IoContext
492+
// gets destroyed. In that case we no longer have an IoContext available and can't get the
493+
// current time, but the outcome timestamp will have already been set. Since the outcome
494+
// timestamp is "late enough", simply use that.
495+
// TODO(o11y): fix this – spans should not be outliving the IoContext.
496+
if (completeTime != kj::UNIX_EPOCH) {
497+
span.endTime = completeTime;
498+
} else {
499+
// Otherwise, we can't actually get an end timestamp that makes sense. Report a zero-duration
500+
// span and log a warning (or fail assert in test mode).
501+
span.endTime = span.startTime;
502+
if (isPredictableModeForTest()) {
503+
KJ_FAIL_ASSERT("reported span after IoContext was deallocated");
504+
} else {
505+
KJ_LOG(WARNING, "reported span after IoContext was deallocated");
506+
}
507+
}
508+
}
509+
}
510+
}
511+
516512
void WorkerTracer::setReturn(
517513
kj::Maybe<kj::Date> timestamp, kj::Maybe<tracing::FetchResponseInfo> fetchResponseInfo) {
518514
// Match the behavior of setEventInfo(). Any resolution of the TODO comments in setEventInfo()
@@ -588,11 +584,7 @@ void UserSpanObserver::report(const Span& span) {
588584
}
589585

590586
void UserSpanObserver::reportStart(kj::ConstString& operationName, kj::Date startTime) {
591-
submitter->submitSpanStart(spanId, parentSpanId, operationName, startTime);
592-
}
593-
594-
void UserSpanObserver::reportEnd(const Span& span) {
595-
submitter->submitSpanEnd(spanId, span);
587+
submitter->submitSpanOpen(spanId, parentSpanId, operationName, startTime);
596588
}
597589

598590
// Provide I/O time to the tracing system for user spans.

src/workerd/io/tracer.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,15 @@ class BaseTracer: public kj::Refcounted {
3131
kj::Date timestamp,
3232
LogLevel logLevel,
3333
kj::String message) = 0;
34-
// Add a span.
34+
// Add a complete span.
3535
virtual void addSpan(tracing::CompleteSpan&& span) = 0;
36+
// Add information about a span when it is opened, corresponds to SpanOpen event.
3637
virtual void addSpanOpen(tracing::SpanId spanId,
3738
tracing::SpanId parentSpanId,
3839
kj::ConstString& operationName,
3940
kj::Date startTime) = 0;
40-
virtual void addSpanEnd(tracing::CompleteSpan&& span) = 0;
41+
// Add span events when the span is complete (Attributes and SpanClose).
42+
virtual void addSpanEnd(tracing::SpanEndData&& span) = 0;
4143

4244
virtual void addException(const tracing::InvocationSpanContext& context,
4345
kj::Date timestamp,
@@ -95,6 +97,7 @@ class BaseTracer: public kj::Refcounted {
9597

9698
// helper method for addSpan() implementations
9799
void adjustSpanTime(tracing::CompleteSpan& span);
100+
void adjustSpanTime(tracing::SpanEndData& span);
98101

99102
// Function to create the root span for the new tracing format.
100103
kj::Maybe<MakeUserRequestSpanFunc> makeUserRequestSpanFunc;
@@ -135,7 +138,7 @@ class WorkerTracer final: public BaseTracer {
135138
tracing::SpanId parentSpanId,
136139
kj::ConstString& operationName,
137140
kj::Date startTime) override;
138-
void addSpanEnd(tracing::CompleteSpan&& span) override;
141+
void addSpanEnd(tracing::SpanEndData&& span) override;
139142
void addException(const tracing::InvocationSpanContext& context,
140143
kj::Date timestamp,
141144
kj::String name,
@@ -192,7 +195,7 @@ class WorkerTracer final: public BaseTracer {
192195
class SpanSubmitter: public kj::Refcounted {
193196
public:
194197
virtual void submitSpan(tracing::SpanId context, tracing::SpanId spanId, const Span& span) = 0;
195-
virtual void submitSpanStart(tracing::SpanId spanId,
198+
virtual void submitSpanOpen(tracing::SpanId spanId,
196199
tracing::SpanId parentSpanId,
197200
kj::ConstString& operationName,
198201
kj::Date startTime) = 0;
@@ -219,7 +222,6 @@ class UserSpanObserver final: public SpanObserver {
219222
kj::Own<SpanObserver> newChild() override;
220223
void report(const Span& span) override;
221224
void reportStart(kj::ConstString& operationName, kj::Date startTime) override;
222-
void reportEnd(const Span& span) override;
223225
kj::Date getTime() override;
224226

225227
private:

src/workerd/server/server.c++

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,14 +1701,14 @@ class SequentialSpanSubmitter final: public SpanSubmitter {
17011701
public:
17021702
SequentialSpanSubmitter(kj::Own<WorkerTracer> workerTracer): workerTracer(kj::mv(workerTracer)) {}
17031703
void submitSpan(tracing::SpanId spanId, tracing::SpanId parentSpanId, const Span& span) override {
1704-
// We will soon report SpanOpen and SpanClose separately. In workerd, we can already use this to
1705-
// implement submitSpan using submitSpanStart and submitSpanEnd.
1704+
// This code path is workerd-only, we can safely decompose this span into its components and
1705+
// call submitSpanOpen/submitSpanEnd instead of reimplementing them here.
17061706
kj::ConstString blah = span.operationName.clone();
1707-
submitSpanStart(spanId, parentSpanId, blah, span.startTime);
1707+
submitSpanOpen(spanId, parentSpanId, blah, span.startTime);
17081708
submitSpanEnd(spanId, span);
17091709
}
17101710

1711-
void submitSpanStart(tracing::SpanId spanId,
1711+
void submitSpanOpen(tracing::SpanId spanId,
17121712
tracing::SpanId parentSpanId,
17131713
kj::ConstString& operationName,
17141714
kj::Date startTime) override {
@@ -1719,10 +1719,7 @@ class SequentialSpanSubmitter final: public SpanSubmitter {
17191719
}
17201720

17211721
void submitSpanEnd(tracing::SpanId spanId, const Span& span) override {
1722-
// We largely recreate the span here which feels inefficient, but is hard to avoid given the
1723-
// mismatch between the Span type and the full span information required for OTel.
1724-
tracing::CompleteSpan span2(
1725-
spanId, tracing::SpanId::nullId, span.operationName.clone(), span.startTime, span.endTime);
1722+
tracing::SpanEndData span2(spanId, span.startTime, span.endTime);
17261723
span2.tags.reserve(span.tags.size());
17271724
for (auto& tag: span.tags) {
17281725
span2.tags.insert(tag.key.clone(), spanTagClone(tag.value));

0 commit comments

Comments
 (0)