Streaming
Stream tokens and events as they are generated, reducing time-to-first-token (TTFT) and enabling real-time UI updates.
See the official Streaming documentation for event types and behavior.
Stream Helpers (recommended)
High-level wrapper with typed events and automatic accumulation — no manual chunk stitching.
//! Streaming chat completion example.
//!
//! Run with: `OPENAI_API_KEY=sk-... cargo run --example chat_stream`
use futures_util::StreamExt;
use openai_oxide::OpenAI;
use openai_oxide::types::chat::*;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = OpenAI::from_env()?;
let request = ChatCompletionRequest::new(
"gpt-4o-mini",
vec![
ChatCompletionMessageParam::System {
content: "You are a helpful assistant.".into(),
name: None,
},
ChatCompletionMessageParam::User {
content: UserContent::Text("Write a haiku about Rust programming.".into()),
name: None,
},
],
);
let mut stream = client.chat().completions().create_stream(request).await?;
while let Some(result) = stream.next().await {
match result {
Ok(chunk) => {
for choice in &chunk.choices {
if let Some(content) = &choice.delta.content {
print!("{content}");
}
if choice.finish_reason.is_some() {
println!();
}
}
}
Err(e) => {
eprintln!("\nStream error: {e}");
break;
}
}
}
Ok(())
}
Event Types
| Event | When | Fields |
|---|---|---|
Chunk | Every SSE chunk | Raw ChatCompletionChunk |
ContentDelta | New text fragment | delta, snapshot (accumulated) |
ContentDone | Text complete | content (full text) |
ToolCallDelta | Argument fragment | index, name, arguments_delta, arguments_snapshot |
ToolCallDone | Tool call complete | index, call_id, name, arguments |
RefusalDelta/Done | Model refuses | delta/refusal |
Done | Stream finished | finish_reason |
Node.js (drop-in replacement)
Same syntax as official openai package — for await over stream:
/**
* Drop-in replacement for official openai SDK demo.
* Change: `const OpenAI = require('openai')` → `const { OpenAI } = require('openai-oxide/compat')`
*/
// ── One-line change from official SDK ──
// const OpenAI = require('openai');
const { OpenAI } = require('../compat');
async function main() {
const client = new OpenAI();
// Non-streaming:
console.log("----- standard request -----");
const completion = await client.chat.completions.create({
model: "gpt-5.4-mini",
messages: [{ role: "user", content: "Say this is a test" }],
});
console.log(completion.choices[0].message.content);
// Streaming:
console.log("----- streaming request -----");
const stream = await client.chat.completions.create({
model: "gpt-5.4-mini",
messages: [{ role: "user", content: "How do I list files in a directory using Node.js?" }],
stream: true,
});
for await (const chunk of stream) {
const content = chunk.choices?.[0]?.delta?.content;
if (content) process.stdout.write(content);
}
console.log();
}
main();
Python (drop-in replacement)
Same syntax as official openai package — async for over stream:
#!/usr/bin/env python3
"""
Drop-in replacement for official openai SDK demo.
Change: `from openai import AsyncOpenAI` → `from openai_oxide.compat import AsyncOpenAI`
"""
import asyncio
# ── One-line change from official SDK ──
# from openai import AsyncOpenAI
from openai_oxide.compat import AsyncOpenAI
async def main():
client = AsyncOpenAI()
# Non-streaming:
print("----- standard request -----")
completion = await client.chat.completions.create(
model="gpt-5.4-mini",
messages=[
{
"role": "user",
"content": "Say this is a test",
},
],
)
print(completion.choices[0].message.content)
# Streaming:
print("----- streaming request -----")
stream = await client.chat.completions.create(
model="gpt-5.4-mini",
messages=[
{
"role": "user",
"content": "How do I output all files in a directory using Python?",
},
],
stream=True,
)
async for event in stream:
if event.get("type") == "OutputTextDelta":
print(event.get("delta", ""), end="")
elif event.get("delta"):
print(event.get("delta", ""), end="")
print()
asyncio.run(main())
Responses API Streaming
Typed events for the Responses API:
#![allow(unused)]
fn main() {
use futures_util::StreamExt;
use openai_oxide::types::responses::{ResponseCreateRequest, ResponseStreamEvent};
let mut stream = client.responses()
.create_stream(ResponseCreateRequest::new("gpt-5.4-mini").input("Hi"))
.await?;
while let Some(Ok(event)) = stream.next().await {
match event {
ResponseStreamEvent::OutputTextDelta { delta, .. } => print!("{delta}"),
ResponseStreamEvent::ResponseCompleted { response } => {
println!("\nDone: {}", response.output_text());
}
_ => {}
}
}
}
Next Steps
- Function Calling — Stream with early tool-call parsing
- WebSocket Sessions — Even lower latency with persistent connections
- Structured Output — Type-safe responses