Streaming

Stream tokens and events as they are generated, reducing time-to-first-token (TTFT) and enabling real-time UI updates.

See the official Streaming documentation for event types and behavior.

Stream Helpers (recommended)

High-level wrapper with typed events and automatic accumulation — no manual chunk stitching.

//! Streaming chat completion example.
//!
//! Run with: `OPENAI_API_KEY=sk-... cargo run --example chat_stream`

use futures_util::StreamExt;
use openai_oxide::OpenAI;
use openai_oxide::types::chat::*;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = OpenAI::from_env()?;

    let request = ChatCompletionRequest::new(
        "gpt-4o-mini",
        vec![
            ChatCompletionMessageParam::System {
                content: "You are a helpful assistant.".into(),
                name: None,
            },
            ChatCompletionMessageParam::User {
                content: UserContent::Text("Write a haiku about Rust programming.".into()),
                name: None,
            },
        ],
    );

    let mut stream = client.chat().completions().create_stream(request).await?;

    while let Some(result) = stream.next().await {
        match result {
            Ok(chunk) => {
                for choice in &chunk.choices {
                    if let Some(content) = &choice.delta.content {
                        print!("{content}");
                    }
                    if choice.finish_reason.is_some() {
                        println!();
                    }
                }
            }
            Err(e) => {
                eprintln!("\nStream error: {e}");
                break;
            }
        }
    }

    Ok(())
}

Event Types

Event	When	Fields
`Chunk`	Every SSE chunk	Raw `ChatCompletionChunk`
`ContentDelta`	New text fragment	`delta`, `snapshot` (accumulated)
`ContentDone`	Text complete	`content` (full text)
`ToolCallDelta`	Argument fragment	`index`, `name`, `arguments_delta`, `arguments_snapshot`
`ToolCallDone`	Tool call complete	`index`, `call_id`, `name`, `arguments`
`RefusalDelta/Done`	Model refuses	`delta`/`refusal`
`Done`	Stream finished	`finish_reason`

Node.js (drop-in replacement)

Same syntax as official openai package — for await over stream:

/**
 * Drop-in replacement for official openai SDK demo.
 * Change: `const OpenAI = require('openai')` → `const { OpenAI } = require('openai-oxide/compat')`
 */

// ── One-line change from official SDK ──
// const OpenAI = require('openai');
const { OpenAI } = require('../compat');

async function main() {
    const client = new OpenAI();

    // Non-streaming:
    console.log("----- standard request -----");
    const completion = await client.chat.completions.create({
        model: "gpt-5.4-mini",
        messages: [{ role: "user", content: "Say this is a test" }],
    });
    console.log(completion.choices[0].message.content);

    // Streaming:
    console.log("----- streaming request -----");
    const stream = await client.chat.completions.create({
        model: "gpt-5.4-mini",
        messages: [{ role: "user", content: "How do I list files in a directory using Node.js?" }],
        stream: true,
    });
    for await (const chunk of stream) {
        const content = chunk.choices?.[0]?.delta?.content;
        if (content) process.stdout.write(content);
    }
    console.log();
}

main();

Python (drop-in replacement)

Same syntax as official openai package — async for over stream:

#!/usr/bin/env python3
"""
Drop-in replacement for official openai SDK demo.
Change: `from openai import AsyncOpenAI` → `from openai_oxide.compat import AsyncOpenAI`
"""

import asyncio

# ── One-line change from official SDK ──
# from openai import AsyncOpenAI
from openai_oxide.compat import AsyncOpenAI


async def main():
    client = AsyncOpenAI()

    # Non-streaming:
    print("----- standard request -----")
    completion = await client.chat.completions.create(
        model="gpt-5.4-mini",
        messages=[
            {
                "role": "user",
                "content": "Say this is a test",
            },
        ],
    )
    print(completion.choices[0].message.content)

    # Streaming:
    print("----- streaming request -----")
    stream = await client.chat.completions.create(
        model="gpt-5.4-mini",
        messages=[
            {
                "role": "user",
                "content": "How do I output all files in a directory using Python?",
            },
        ],
        stream=True,
    )
    async for event in stream:
        if event.get("type") == "OutputTextDelta":
            print(event.get("delta", ""), end="")
        elif event.get("delta"):
            print(event.get("delta", ""), end="")
    print()


asyncio.run(main())

Responses API Streaming

Typed events for the Responses API:

#![allow(unused)]
fn main() {
use futures_util::StreamExt;
use openai_oxide::types::responses::{ResponseCreateRequest, ResponseStreamEvent};

let mut stream = client.responses()
    .create_stream(ResponseCreateRequest::new("gpt-5.4-mini").input("Hi"))
    .await?;

while let Some(Ok(event)) = stream.next().await {
    match event {
        ResponseStreamEvent::OutputTextDelta { delta, .. } => print!("{delta}"),
        ResponseStreamEvent::ResponseCompleted { response } => {
            println!("\nDone: {}", response.output_text());
        }
        _ => {}
    }
}
}

Next Steps

Function Calling — Stream with early tool-call parsing
WebSocket Sessions — Even lower latency with persistent connections
Structured Output — Type-safe responses

Keyboard shortcuts