Skip to content

Commit 414b4d5

Browse files
committed
Add sessionAffinity header support
Add a sessionAffinity option across Workers AI adapters/providers to route requests with the same key to the same backend replica via the x-session-affinity header for prefix-cache optimization. Implementation details: - Extend WorkersAiAdapterConfig with an optional sessionAffinity string. - Propagate sessionAffinity as x-session-affinity to binding.run() via createWorkersAiBindingFetch(extraHeaders), to REST requests via defaultHeaders, and to gateway mode via createGatewayFetch call. - Merge sessionAffinity with user-provided extraHeaders in the WorkersAI provider so both headers are forwarded together. Other changes: - Add and update tests covering binding.fetch, adapter behavior, and REST/binding header merging. - Update README docs for tanstack-ai and workers-ai-provider to document sessionAffinity usage. - Add changeset files to trigger a patch release for the relevant packages and minor formatting updates to demos.json.
1 parent b1c742b commit 414b4d5

File tree

12 files changed

+325
-131
lines changed

12 files changed

+325
-131
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
"workers-ai-provider": minor
2+
"workers-ai-provider": patch
33
---
44

55
Add `sessionAffinity` setting to send `x-session-affinity` header for prefix-cache optimization. Also forward `extraHeaders` in the REST API path instead of discarding them.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@cloudflare/tanstack-ai": patch
3+
---
4+
5+
Add `sessionAffinity` option to `WorkersAiAdapterConfig` for prefix-cache optimization. Routes requests with the same key to the same backend replica via the `x-session-affinity` header. Supported across binding, REST, and gateway modes.

demos.json

Lines changed: 111 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,112 @@
11
{
2-
"demos": {
3-
"./demos/agent-scheduler": {
4-
"package_json_hash": "2fe8785345d56ff37e675baaa06380c1eed736ba"
5-
},
6-
"./demos/agent-task-manager": {
7-
"package_json_hash": "f2245bb30e95d0785aa195635a0928fba5b621ae"
8-
},
9-
"./demos/agent-task-manager-human-in-the-loop": {
10-
"package_json_hash": "d8856a52bd0bf9641b8d1ff98e06de650aece34a"
11-
},
12-
"./demos/evaluator-optimiser": {
13-
"package_json_hash": "8c4e9a71c91d806dcbef586b50a594e650d8f090"
14-
},
15-
"./demos/image-generation": {
16-
"package_json_hash": "697d55539ad024faa349faa3dcd3bcbdfacf37bc"
17-
},
18-
"./demos/mcp-client": {
19-
"package_json_hash": "5129c9edfcdd03c8625615c85329d85138fb9773"
20-
},
21-
"./demos/mcp-server-bearer-auth": {
22-
"package_json_hash": "8703a8f8992a06377cce9a139ce6709450b51b5c"
23-
},
24-
"./demos/mcp-slack-oauth": {
25-
"package_json_hash": "3134658fb11397626329bc344eba48fd57b21d46"
26-
},
27-
"./demos/mcp-stytch-b2b-okr-manager": {
28-
"package_json_hash": "48232b81779a5f5fb0253842b4243c7dad032c0c"
29-
},
30-
"./demos/mcp-stytch-consumer-todo-list": {
31-
"package_json_hash": "f53fe23dcebec62f51f9a6e332d2c192b8598cf6"
32-
},
33-
"./demos/model-scraper": {
34-
"package_json_hash": "5a20ad46b257699c313bdd7c0b520701d739ed12"
35-
},
36-
"./demos/orchestrator-workers": {
37-
"package_json_hash": "e159d1ce03c17bf13239ee4ac76c0290a210bc38"
38-
},
39-
"./demos/parallelisation": {
40-
"package_json_hash": "6dbc55c3277b3ea634776821e60642b3dd03d8c0"
41-
},
42-
"./demos/prompt-chaining": {
43-
"package_json_hash": "510159b05545a2d7f9c8cb240def56649cd25989"
44-
},
45-
"./demos/remote-mcp-authkit": {
46-
"package_json_hash": "d3a0122c45d27140db96df6859e191aa7d2f8ac1"
47-
},
48-
"./demos/remote-mcp-github-oauth": {
49-
"package_json_hash": "c59a2ecc4937d54c658383c3d7fe95e7c123f5c1"
50-
},
51-
"./demos/remote-mcp-server": {
52-
"package_json_hash": "6240672fd54010c3b03a8af553b420306e11bc78"
53-
},
54-
"./demos/routing": {
55-
"package_json_hash": "5f547b98f4e9a6167a2913e3a6c61681312986dd"
56-
},
57-
"./demos/structured-output": {
58-
"package_json_hash": "a66aacd49c57e74c0937bf4bea0986168086debb"
59-
},
60-
"./demos/structured-output-node": {
61-
"package_json_hash": "f64cc27508f9dda6fbb3bf4192c031dcc671e64a"
62-
},
63-
"./demos/text-generation": {
64-
"package_json_hash": "d52767521e285b05c3235eaf2c8cc0e47fdbf90d"
65-
},
66-
"./demos/text-generation-stream": {
67-
"package_json_hash": "f8272f5b1f5f1c83c53395dfc76646cab18a32b7"
68-
},
69-
"./demos/tool-calling": {
70-
"package_json_hash": "3a0b1d91022d706b96e7b429c1349116ba9373b5"
71-
},
72-
"./demos/tool-calling-stream": {
73-
"package_json_hash": "7c92250cda46aaac7eb6aeea0255828781c4abcb"
74-
},
75-
"./demos/tool-calling-stream-traditional": {
76-
"package_json_hash": "c610c334d5f53a6e399bddddf68098ca0dec96d7"
77-
},
78-
"./demos/ui-worker": {
79-
"package_json_hash": "831702fff4771ce9ce7d93afe6824ec6fa316125"
80-
},
81-
"./demos/remote-mcp-cf-access": {
82-
"package_json_hash": "1a09d449c88cfe3b989f352d18813385578b98ca"
83-
},
84-
"./demos/remote-mcp-authless": {
85-
"package_json_hash": "ba9953ce57a26cb271144e67609ed98fd1c1110e"
86-
},
87-
"./demos/python-workers-mcp": {
88-
"package_json_hash": "0e710d7b27bb34edba396dc2b3365db230c076cb"
89-
},
90-
"./demos/vision": {
91-
"package_json_hash": "e53450d50753f0574995feef3b2f845045fc3dc3"
92-
},
93-
"./demos/remote-mcp-google-oauth": {
94-
"package_json_hash": "21bdab2ebbbe336c5fe6fb032fde804373f1b489"
95-
},
96-
"./demos/remote-mcp-logto": {
97-
"package_json_hash": "a98a0cb367641ff86d89a7127f5e2551d2a1532f"
98-
},
99-
"./demos/remote-mcp-server-descope-auth": {
100-
"package_json_hash": "c5de845803aae734fa60185200d4bfa2e1d0fb23"
101-
},
102-
"./demos/remote-mcp-server-autorag": {
103-
"package_json_hash": "2b4e9b35192362b3be2743370469ce3a627a72b0"
104-
},
105-
"./demos/use-mcp-inspector": {
106-
"package_json_hash": "d1d084f1aa9a752ead5250e0a070f97a9114dcea"
107-
},
108-
"./demos/hello-world": {
109-
"package_json_hash": "ab24a12893c001fe3416fadea2a8bf5e7e68392e"
110-
}
111-
}
112-
}
2+
"demos": {
3+
"./demos/agent-scheduler": {
4+
"package_json_hash": "2fe8785345d56ff37e675baaa06380c1eed736ba"
5+
},
6+
"./demos/agent-task-manager": {
7+
"package_json_hash": "f2245bb30e95d0785aa195635a0928fba5b621ae"
8+
},
9+
"./demos/agent-task-manager-human-in-the-loop": {
10+
"package_json_hash": "d8856a52bd0bf9641b8d1ff98e06de650aece34a"
11+
},
12+
"./demos/evaluator-optimiser": {
13+
"package_json_hash": "8c4e9a71c91d806dcbef586b50a594e650d8f090"
14+
},
15+
"./demos/image-generation": {
16+
"package_json_hash": "697d55539ad024faa349faa3dcd3bcbdfacf37bc"
17+
},
18+
"./demos/mcp-client": {
19+
"package_json_hash": "5129c9edfcdd03c8625615c85329d85138fb9773"
20+
},
21+
"./demos/mcp-server-bearer-auth": {
22+
"package_json_hash": "8703a8f8992a06377cce9a139ce6709450b51b5c"
23+
},
24+
"./demos/mcp-slack-oauth": {
25+
"package_json_hash": "3134658fb11397626329bc344eba48fd57b21d46"
26+
},
27+
"./demos/mcp-stytch-b2b-okr-manager": {
28+
"package_json_hash": "48232b81779a5f5fb0253842b4243c7dad032c0c"
29+
},
30+
"./demos/mcp-stytch-consumer-todo-list": {
31+
"package_json_hash": "f53fe23dcebec62f51f9a6e332d2c192b8598cf6"
32+
},
33+
"./demos/model-scraper": {
34+
"package_json_hash": "5a20ad46b257699c313bdd7c0b520701d739ed12"
35+
},
36+
"./demos/orchestrator-workers": {
37+
"package_json_hash": "e159d1ce03c17bf13239ee4ac76c0290a210bc38"
38+
},
39+
"./demos/parallelisation": {
40+
"package_json_hash": "6dbc55c3277b3ea634776821e60642b3dd03d8c0"
41+
},
42+
"./demos/prompt-chaining": {
43+
"package_json_hash": "510159b05545a2d7f9c8cb240def56649cd25989"
44+
},
45+
"./demos/remote-mcp-authkit": {
46+
"package_json_hash": "d3a0122c45d27140db96df6859e191aa7d2f8ac1"
47+
},
48+
"./demos/remote-mcp-github-oauth": {
49+
"package_json_hash": "c59a2ecc4937d54c658383c3d7fe95e7c123f5c1"
50+
},
51+
"./demos/remote-mcp-server": {
52+
"package_json_hash": "6240672fd54010c3b03a8af553b420306e11bc78"
53+
},
54+
"./demos/routing": {
55+
"package_json_hash": "5f547b98f4e9a6167a2913e3a6c61681312986dd"
56+
},
57+
"./demos/structured-output": {
58+
"package_json_hash": "a66aacd49c57e74c0937bf4bea0986168086debb"
59+
},
60+
"./demos/structured-output-node": {
61+
"package_json_hash": "f64cc27508f9dda6fbb3bf4192c031dcc671e64a"
62+
},
63+
"./demos/text-generation": {
64+
"package_json_hash": "d52767521e285b05c3235eaf2c8cc0e47fdbf90d"
65+
},
66+
"./demos/text-generation-stream": {
67+
"package_json_hash": "f8272f5b1f5f1c83c53395dfc76646cab18a32b7"
68+
},
69+
"./demos/tool-calling": {
70+
"package_json_hash": "3a0b1d91022d706b96e7b429c1349116ba9373b5"
71+
},
72+
"./demos/tool-calling-stream": {
73+
"package_json_hash": "7c92250cda46aaac7eb6aeea0255828781c4abcb"
74+
},
75+
"./demos/tool-calling-stream-traditional": {
76+
"package_json_hash": "c610c334d5f53a6e399bddddf68098ca0dec96d7"
77+
},
78+
"./demos/ui-worker": {
79+
"package_json_hash": "831702fff4771ce9ce7d93afe6824ec6fa316125"
80+
},
81+
"./demos/remote-mcp-cf-access": {
82+
"package_json_hash": "1a09d449c88cfe3b989f352d18813385578b98ca"
83+
},
84+
"./demos/remote-mcp-authless": {
85+
"package_json_hash": "ba9953ce57a26cb271144e67609ed98fd1c1110e"
86+
},
87+
"./demos/python-workers-mcp": {
88+
"package_json_hash": "0e710d7b27bb34edba396dc2b3365db230c076cb"
89+
},
90+
"./demos/vision": {
91+
"package_json_hash": "e53450d50753f0574995feef3b2f845045fc3dc3"
92+
},
93+
"./demos/remote-mcp-google-oauth": {
94+
"package_json_hash": "21bdab2ebbbe336c5fe6fb032fde804373f1b489"
95+
},
96+
"./demos/remote-mcp-logto": {
97+
"package_json_hash": "a98a0cb367641ff86d89a7127f5e2551d2a1532f"
98+
},
99+
"./demos/remote-mcp-server-descope-auth": {
100+
"package_json_hash": "c5de845803aae734fa60185200d4bfa2e1d0fb23"
101+
},
102+
"./demos/remote-mcp-server-autorag": {
103+
"package_json_hash": "2b4e9b35192362b3be2743370469ce3a627a72b0"
104+
},
105+
"./demos/use-mcp-inspector": {
106+
"package_json_hash": "d1d084f1aa9a752ead5250e0a070f97a9114dcea"
107+
},
108+
"./demos/hello-world": {
109+
"package_json_hash": "ab24a12893c001fe3416fadea2a8bf5e7e68392e"
110+
}
111+
}
112+
}

packages/tanstack-ai/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,15 @@ Workers AI supports four configuration modes:
275275

276276
Third-party providers (OpenAI, Anthropic, Gemini, Grok, OpenRouter) only support the gateway modes.
277277

278+
All Workers AI config modes also accept `sessionAffinity` to route requests with the same key to the same backend replica for prefix-cache optimization:
279+
280+
```typescript
281+
const adapter = createWorkersAiChat("@cf/meta/llama-3.3-70b-instruct-fp8-fast", {
282+
binding: env.AI,
283+
sessionAffinity: "my-unique-session-id",
284+
});
285+
```
286+
278287
## Links
279288

280289
- [TanStack AI Documentation](https://tanstack.com/ai)

packages/tanstack-ai/src/adapters/workers-ai.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,18 @@ export type WorkersAiTextModel =
3333
function buildWorkersAiClient(config: WorkersAiAdapterConfig): OpenAI {
3434
validateWorkersAiConfig(config);
3535

36+
const sessionHeaders: Record<string, string> | undefined = config.sessionAffinity
37+
? { "x-session-affinity": config.sessionAffinity }
38+
: undefined;
39+
3640
if (isDirectBindingConfig(config)) {
3741
// Plain binding mode: shim translates OpenAI fetch calls to env.AI.run()
3842
return new OpenAI({
3943
apiKey: "unused",
40-
fetch: createWorkersAiBindingFetch(config.binding),
44+
fetch: createWorkersAiBindingFetch(
45+
config.binding,
46+
sessionHeaders ? { extraHeaders: sessionHeaders } : undefined,
47+
),
4148
});
4249
}
4350

@@ -46,13 +53,14 @@ function buildWorkersAiClient(config: WorkersAiAdapterConfig): OpenAI {
4653
return new OpenAI({
4754
baseURL: `https://api.cloudflare.com/client/v4/accounts/${config.accountId}/ai/v1`,
4855
apiKey: config.apiKey,
56+
defaultHeaders: sessionHeaders,
4957
});
5058
}
5159

5260
// Gateway mode (existing): use createGatewayFetch
5361
const gatewayConfig = config as AiGatewayAdapterConfig;
5462
return new OpenAI({
55-
fetch: createGatewayFetch("workers-ai", gatewayConfig),
63+
fetch: createGatewayFetch("workers-ai", gatewayConfig, sessionHeaders),
5664
apiKey: gatewayConfig.apiKey ?? "unused",
5765
});
5866
}
@@ -377,11 +385,8 @@ export class WorkersAiTextAdapter<TModel extends WorkersAiTextModel> extends Bas
377385

378386
// Reasoning content (used by models like QwQ, DeepSeek R1, Kimi K2.5)
379387
// The OpenAI SDK doesn't type this field, but models send it as an extension.
380-
const reasoningContent = ((delta as Record<string, unknown>)
381-
.reasoning_content ??
382-
(delta as Record<string, unknown>).reasoning) as
383-
| string
384-
| undefined;
388+
const reasoningContent = ((delta as Record<string, unknown>).reasoning_content ??
389+
(delta as Record<string, unknown>).reasoning) as string | undefined;
385390
if (reasoningContent) {
386391
// RUN_STARTED is already guaranteed by the guard above
387392
if (!hasEmittedStepStarted) {

packages/tanstack-ai/src/utils/create-fetcher.ts

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,17 @@ export interface WorkersAiDirectCredentialsConfig {
105105
* upstream provider), distinct from `cfApiKey` (used in the `cf-aig-authorization`
106106
* header for authenticated gateways).
107107
*/
108-
export type WorkersAiAdapterConfig =
108+
export type WorkersAiAdapterConfig = (
109109
| WorkersAiDirectBindingConfig
110110
| WorkersAiDirectCredentialsConfig
111-
| (AiGatewayAdapterConfig & { apiKey?: string });
111+
| (AiGatewayAdapterConfig & { apiKey?: string })
112+
) & {
113+
/**
114+
* Session affinity key for prefix-cache optimization.
115+
* Routes requests with the same key to the same backend replica.
116+
*/
117+
sessionAffinity?: string;
118+
};
112119

113120
// ---------------------------------------------------------------------------
114121
// Config detection helpers
@@ -330,7 +337,10 @@ function sanitizeToolCallId(id: string): string {
330337
* request parameters are extracted from the JSON body, matching Workers AI's
331338
* `binding.run(model, inputs)` calling convention.
332339
*/
333-
export function createWorkersAiBindingFetch(binding: WorkersAiBinding): typeof fetch {
340+
export function createWorkersAiBindingFetch(
341+
binding: WorkersAiBinding,
342+
options?: { extraHeaders?: Record<string, string> },
343+
): typeof fetch {
334344
return async (_input, init) => {
335345
if (!init?.body) {
336346
return new Response("No body", { status: 400 });
@@ -359,7 +369,11 @@ export function createWorkersAiBindingFetch(binding: WorkersAiBinding): typeof f
359369
if (body.response_format) inputs.response_format = body.response_format;
360370
if (stream) inputs.stream = true;
361371

362-
const result = await binding.run(model, inputs);
372+
const result = await binding.run(
373+
model,
374+
inputs,
375+
options?.extraHeaders ? { extraHeaders: options.extraHeaders } : undefined,
376+
);
363377

364378
if (stream && result instanceof ReadableStream) {
365379
// Workers AI returns an SSE stream with `data: {"response":"chunk"}` format.

packages/tanstack-ai/test/binding-fetch.test.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,44 @@ describe("createWorkersAiBindingFetch", () => {
545545
expect(json.choices[0]!.finish_reason).toBe("stop");
546546
});
547547

548+
it("should forward extraHeaders to binding.run() when configured", async () => {
549+
const binding = mockBinding(vi.fn().mockResolvedValue({ response: "ok" }));
550+
551+
const fetcher = createWorkersAiBindingFetch(binding, {
552+
extraHeaders: { "x-session-affinity": "session-123" },
553+
});
554+
555+
await fetcher("https://api.openai.com/v1/chat/completions", {
556+
method: "POST",
557+
body: JSON.stringify({
558+
model: "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
559+
messages: [{ role: "user", content: "Hi" }],
560+
}),
561+
});
562+
563+
expect(binding.run).toHaveBeenCalledOnce();
564+
const [, , options] = binding.run.mock.calls[0]!;
565+
expect(options).toEqual({ extraHeaders: { "x-session-affinity": "session-123" } });
566+
});
567+
568+
it("should not pass extraHeaders to binding.run() when not configured", async () => {
569+
const binding = mockBinding(vi.fn().mockResolvedValue({ response: "ok" }));
570+
571+
const fetcher = createWorkersAiBindingFetch(binding);
572+
573+
await fetcher("https://api.openai.com/v1/chat/completions", {
574+
method: "POST",
575+
body: JSON.stringify({
576+
model: "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
577+
messages: [{ role: "user", content: "Hi" }],
578+
}),
579+
});
580+
581+
expect(binding.run).toHaveBeenCalledOnce();
582+
const [, , options] = binding.run.mock.calls[0]!;
583+
expect(options).toBeUndefined();
584+
});
585+
548586
it("should pass response_format to binding for structured output", async () => {
549587
const binding = mockBinding(vi.fn().mockResolvedValue({ response: '{"name":"test"}' }));
550588

0 commit comments

Comments
 (0)