TanStack AI supports streaming responses for real-time chat experiences. Streaming allows you to display responses as they're generated, rather than waiting for the complete response.
When you use chat(), it returns an async iterable stream of chunks:
import { chat } from "@tanstack/ai";
import { openai } from "@tanstack/ai-openai";
const stream = chat({
adapter: openai(),
messages,
model: "gpt-4o",
});
// Stream contains chunks as they arrive
for await (const chunk of stream) {
console.log(chunk); // Process each chunk
}
import { chat } from "@tanstack/ai";
import { openai } from "@tanstack/ai-openai";
const stream = chat({
adapter: openai(),
messages,
model: "gpt-4o",
});
// Stream contains chunks as they arrive
for await (const chunk of stream) {
console.log(chunk); // Process each chunk
}
Convert the stream to an HTTP response using toStreamResponse:
import { chat, toStreamResponse } from "@tanstack/ai";
import { openai } from "@tanstack/ai-openai";
export async function POST(request: Request) {
const { messages } = await request.json();
const stream = chat({
adapter: openai(),
messages,
model: "gpt-4o",
});
// Convert to HTTP response with proper headers
return toStreamResponse(stream);
}
import { chat, toStreamResponse } from "@tanstack/ai";
import { openai } from "@tanstack/ai-openai";
export async function POST(request: Request) {
const { messages } = await request.json();
const stream = chat({
adapter: openai(),
messages,
model: "gpt-4o",
});
// Convert to HTTP response with proper headers
return toStreamResponse(stream);
}
The useChat hook automatically handles streaming:
import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
const { messages, sendMessage, isLoading } = useChat({
connection: fetchServerSentEvents("/api/chat"),
});
// Messages update in real-time as chunks arrive
messages.forEach((message) => {
// Message content updates incrementally
});
import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
const { messages, sendMessage, isLoading } = useChat({
connection: fetchServerSentEvents("/api/chat"),
});
// Messages update in real-time as chunks arrive
messages.forEach((message) => {
// Message content updates incrementally
});
Stream chunks contain different types of data:
Thinking chunks represent the model's reasoning process. They stream separately from the final response text:
for await (const chunk of stream) {
if (chunk.type === "thinking") {
console.log("Thinking:", chunk.content); // Accumulated thinking content
console.log("Delta:", chunk.delta); // Incremental thinking token
}
}
for await (const chunk of stream) {
if (chunk.type === "thinking") {
console.log("Thinking:", chunk.content); // Accumulated thinking content
console.log("Delta:", chunk.delta); // Incremental thinking token
}
}
Thinking chunks are automatically converted to ThinkingPart in UIMessage objects. They are UI-only and excluded from messages sent back to the model.
TanStack AI provides connection adapters for different streaming protocols:
import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
const { messages } = useChat({
connection: fetchServerSentEvents("/api/chat"),
});
import { useChat, fetchServerSentEvents } from "@tanstack/ai-react";
const { messages } = useChat({
connection: fetchServerSentEvents("/api/chat"),
});
import { useChat, fetchHttpStream } from "@tanstack/ai-react";
const { messages } = useChat({
connection: fetchHttpStream("/api/chat"),
});
import { useChat, fetchHttpStream } from "@tanstack/ai-react";
const { messages } = useChat({
connection: fetchHttpStream("/api/chat"),
});
import { stream } from "@tanstack/ai-react";
const { messages } = useChat({
connection: stream(async (messages, data, signal) => {
// Custom streaming implementation
const response = await fetch("/api/chat", {
method: "POST",
body: JSON.stringify({ messages, ...data }),
signal,
});
// Return async iterable
return processStream(response);
}),
});
import { stream } from "@tanstack/ai-react";
const { messages } = useChat({
connection: stream(async (messages, data, signal) => {
// Custom streaming implementation
const response = await fetch("/api/chat", {
method: "POST",
body: JSON.stringify({ messages, ...data }),
signal,
});
// Return async iterable
return processStream(response);
}),
});
You can monitor stream progress with callbacks:
const { messages } = useChat({
connection: fetchServerSentEvents("/api/chat"),
onChunk: (chunk) => {
console.log("Received chunk:", chunk);
},
onFinish: (message) => {
console.log("Stream finished:", message);
},
});
const { messages } = useChat({
connection: fetchServerSentEvents("/api/chat"),
onChunk: (chunk) => {
console.log("Received chunk:", chunk);
},
onFinish: (message) => {
console.log("Stream finished:", message);
},
});
Cancel ongoing streams:
const { stop } = useChat({
connection: fetchServerSentEvents("/api/chat"),
});
// Cancel the current stream
stop();
const { stop } = useChat({
connection: fetchServerSentEvents("/api/chat"),
});
// Cancel the current stream
stop();
