Skip to content

Commit 39c25ea

Browse files
committed
address pr reviews
1 parent e8cb5fb commit 39c25ea

File tree

2 files changed

+26
-24
lines changed

2 files changed

+26
-24
lines changed

src/rust/encoding/lib.rs

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,13 @@ mod ffi {
3030
XUserDefined,
3131
}
3232

33-
/// Result of a decode operation. The output pointer refers to the
33+
/// Result of a decode operation. The output slice borrows the
3434
/// decoder's internal buffer and is valid until the next `decode` or
35-
/// `reset` call. Encoded as `usize` because CXX shared structs cannot
36-
/// contain raw pointers.
37-
struct DecodeResult {
38-
/// Pointer to the UTF-16 output buffer, as `usize`.
39-
/// Cast to `const uint16_t*` on the C++ side.
40-
output_ptr: usize,
41-
/// Number of UTF-16 code units in the output.
42-
output_len: usize,
35+
/// `reset` call.
36+
struct DecodeResult<'a> {
37+
/// UTF-16 code units decoded from the input, borrowing the
38+
/// decoder's reusable output buffer.
39+
output: &'a [u16],
4340
/// True if a fatal decoding error was encountered. Only meaningful
4441
/// when the caller requested fatal mode — in replacement mode errors
4542
/// are silently replaced with U+FFFD and this flag is not set.
@@ -61,10 +58,14 @@ mod ffi {
6158

6259
/// Decode a chunk of bytes. The decoded UTF-16 output is stored in
6360
/// the decoder's internal buffer; the returned `DecodeResult`
64-
/// carries a pointer and length into that buffer. Set `flush` to
65-
/// true on the final chunk. When `fatal` is true and an error is
66-
/// encountered, `had_error` is set and the output may be incomplete.
67-
fn decode(decoder: &mut Decoder, input: &[u8], options: &DecodeOptions) -> DecodeResult;
61+
/// borrows that buffer. Set `flush` to true on the final chunk.
62+
/// When `fatal` is true and an error is encountered, `had_error`
63+
/// is set and the output may be incomplete.
64+
unsafe fn decode<'a>(
65+
decoder: &'a mut Decoder,
66+
input: &[u8],
67+
options: &DecodeOptions,
68+
) -> DecodeResult<'a>;
6869

6970
/// Reset the decoder to its initial state (for explicit reset calls).
7071
fn reset(decoder: &mut Decoder);
@@ -109,11 +110,11 @@ pub fn new_decoder(encoding: ffi::Encoding) -> Box<Decoder> {
109110
})
110111
}
111112

112-
pub fn decode(
113-
state: &mut Decoder,
113+
pub fn decode<'a>(
114+
state: &'a mut Decoder,
114115
input: &[u8],
115116
options: &ffi::DecodeOptions,
116-
) -> ffi::DecodeResult {
117+
) -> ffi::DecodeResult<'a> {
117118
// Lazy reset: reconstruct the inner decoder only when a previous flush
118119
// marked it as needed, avoiding the cost on one-shot decodes where the
119120
// decoder is never reused.
@@ -154,8 +155,7 @@ pub fn decode(
154155
state.inner = state.encoding.new_decoder_without_bom_handling();
155156
state.output.truncate(total_written);
156157
return ffi::DecodeResult {
157-
output_ptr: state.output.as_ptr() as usize,
158-
output_len: state.output.len(),
158+
output: &state.output,
159159
had_error: true,
160160
};
161161
}
@@ -188,8 +188,7 @@ pub fn decode(
188188
}
189189

190190
ffi::DecodeResult {
191-
output_ptr: state.output.as_ptr() as usize,
192-
output_len: state.output.len(),
191+
output: &state.output,
193192
had_error: false,
194193
}
195194
}
@@ -198,4 +197,9 @@ pub fn reset(state: &mut Decoder) {
198197
state.inner = state.encoding.new_decoder_without_bom_handling();
199198
state.needs_reset = false;
200199
// Intentionally keep state.output — preserves the allocation for reuse.
200+
// The buffer can grow up to ~2× the largest input chunk (due to UTF-16
201+
// expansion and the doubling strategy in decode()) and stays at that high-
202+
// water mark. This is acceptable because the Decoder is owned by a JS
203+
// TextDecoder object and is GC'd with it, so the buffer lifetime is
204+
// bounded by the object's reachability.
201205
}

src/workerd/api/encoding-legacy.c++

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,8 @@ kj::Maybe<jsg::JsString> LegacyDecoder::decode(
6969
return kj::none;
7070
}
7171

72-
// Read the decoded UTF-16 output directly from the Rust-owned buffer,
73-
// avoiding a Vec<u16> move across the CXX bridge.
74-
auto ptr = reinterpret_cast<const uint16_t*>(result.output_ptr);
75-
return js.str(kj::ArrayPtr<const uint16_t>(ptr, result.output_len));
72+
// Zero-copy view of the UTF-16 output slice from the Rust-owned buffer.
73+
return js.str(kj::from<kj_rs::Rust>(result.output));
7674
}
7775

7876
} // namespace workerd::api

0 commit comments

Comments
 (0)