@@ -84,6 +84,7 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
8484 }
8585
8686 // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
87+ var capturedPredictOpts * proto.PredictOptions
8788 fn := func () (LLMResponse , error ) {
8889 opts := gRPCPredictOpts (* c , loader .ModelPath )
8990 // Merge request-level metadata (overrides config defaults)
@@ -111,6 +112,7 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
111112 opts .LogitBias = string (logitBiasJSON )
112113 }
113114 }
115+ capturedPredictOpts = opts
114116
115117 tokenUsage := TokenUsage {}
116118
@@ -245,29 +247,19 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
245247 trace .InitBackendTracingIfEnabled (o .TracingMaxItems )
246248
247249 traceData := map [string ]any {
248- "prompt" : s ,
249- "use_tokenizer_template" : c .TemplateConfig .UseTokenizerTemplate ,
250- "chat_template" : c .TemplateConfig .Chat ,
251- "function_template" : c .TemplateConfig .Functions ,
252- "grammar" : c .Grammar ,
253- "stop_words" : c .StopWords ,
254- "streaming" : tokenCallback != nil ,
255- "images_count" : len (images ),
256- "videos_count" : len (videos ),
257- "audios_count" : len (audios ),
250+ "chat_template" : c .TemplateConfig .Chat ,
251+ "function_template" : c .TemplateConfig .Functions ,
252+ "streaming" : tokenCallback != nil ,
253+ "images_count" : len (images ),
254+ "videos_count" : len (videos ),
255+ "audios_count" : len (audios ),
258256 }
259257
260258 if len (messages ) > 0 {
261259 if msgJSON , err := json .Marshal (messages ); err == nil {
262260 traceData ["messages" ] = string (msgJSON )
263261 }
264262 }
265- if tools != "" {
266- traceData ["tools" ] = tools
267- }
268- if toolChoice != "" {
269- traceData ["tool_choice" ] = toolChoice
270- }
271263 if reasoningJSON , err := json .Marshal (c .ReasoningConfig ); err == nil {
272264 traceData ["reasoning_config" ] = string (reasoningJSON )
273265 }
@@ -277,15 +269,6 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
277269 "mixed_mode" : c .FunctionsConfig .GrammarConfig .MixedMode ,
278270 "xml_format_preset" : c .FunctionsConfig .XMLFormatPreset ,
279271 }
280- if c .Temperature != nil {
281- traceData ["temperature" ] = * c .Temperature
282- }
283- if c .TopP != nil {
284- traceData ["top_p" ] = * c .TopP
285- }
286- if c .Maxtokens != nil {
287- traceData ["max_tokens" ] = * c .Maxtokens
288- }
289272
290273 startTime := time .Now ()
291274 originalFn := fn
@@ -299,6 +282,42 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
299282 "completion" : resp .Usage .Completion ,
300283 }
301284
285+ if len (resp .ChatDeltas ) > 0 {
286+ chatDeltasInfo := map [string ]any {
287+ "total_deltas" : len (resp .ChatDeltas ),
288+ }
289+ var contentParts , reasoningParts []string
290+ toolCallCount := 0
291+ for _ , d := range resp .ChatDeltas {
292+ if d .Content != "" {
293+ contentParts = append (contentParts , d .Content )
294+ }
295+ if d .ReasoningContent != "" {
296+ reasoningParts = append (reasoningParts , d .ReasoningContent )
297+ }
298+ toolCallCount += len (d .ToolCalls )
299+ }
300+ if len (contentParts ) > 0 {
301+ chatDeltasInfo ["content" ] = strings .Join (contentParts , "" )
302+ }
303+ if len (reasoningParts ) > 0 {
304+ chatDeltasInfo ["reasoning_content" ] = strings .Join (reasoningParts , "" )
305+ }
306+ if toolCallCount > 0 {
307+ chatDeltasInfo ["tool_call_count" ] = toolCallCount
308+ }
309+ traceData ["chat_deltas" ] = chatDeltasInfo
310+ }
311+
312+ if capturedPredictOpts != nil {
313+ if optsJSON , err := json .Marshal (capturedPredictOpts ); err == nil {
314+ var optsMap map [string ]any
315+ if err := json .Unmarshal (optsJSON , & optsMap ); err == nil {
316+ traceData ["predict_options" ] = optsMap
317+ }
318+ }
319+ }
320+
302321 errStr := ""
303322 if err != nil {
304323 errStr = err .Error ()
0 commit comments