Skip to content

Real-Time Streaming

Translation at the speed of conversation. Sub-200ms latency with WebSocket streaming. Perfect for live meetings, customer support, and real-time applications.


Performance

Metric Value Comparison
First Token <100ms 5x faster than batch
End-to-End <200ms Near-instantaneous
Throughput 10,000 req/s Enterprise scale
Concurrent Unlimited WebSocket multiplexing

WebSocket API

Connection

const ws = new WebSocket('wss://api.pauhu.ai/v1/stream');

ws.onopen = () => {
  ws.send(JSON.stringify({
    type: 'auth',
    api_key: 'pk_...'
  }));
};

Streaming Translation

// Send text for translation
ws.send(JSON.stringify({
  type: 'translate',
  text: 'The meeting will begin shortly.',
  source: 'en',
  target: 'fi',
  stream: true
}));

// Receive tokens as they're generated
ws.onmessage = (event) => {
  const data = JSON.parse(event.data);

  if (data.type === 'token') {
    process.stdout.write(data.token);
    // "Kokous" -> "Kokous alkaa" -> "Kokous alkaa pian."
  }

  if (data.type === 'complete') {
    console.log('\nDone:', data.translation);
    console.log('Latency:', data.latency_ms, 'ms');
  }
};

Python SDK

Streaming Iterator

from pauhu import Pauhu

client = Pauhu()

# Stream tokens as they're generated
for token in client.translate_stream(
    text="Welcome to the European Parliament.",
    target="fi"
):
    print(token, end="", flush=True)

# Output: "Tervetuloa Euroopan parlamenttiin."

Async Streaming

import asyncio
from pauhu import AsyncPauhu

async def translate_realtime():
    client = AsyncPauhu()

    async for token in client.translate_stream(
        text="This is a live translation.",
        target="fi"
    ):
        print(token, end="", flush=True)

asyncio.run(translate_realtime())

Live Meeting Translation

from pauhu import Pauhu
from pauhu.realtime import AudioStream

client = Pauhu()

# Real-time audio translation
async def translate_meeting():
    audio = AudioStream(device="microphone")

    async for segment in audio.transcribe():
        # Transcribe speech to text
        print(f"[{segment.speaker}]: {segment.text}")

        # Translate in real-time
        async for token in client.translate_stream(
            text=segment.text,
            source=segment.language,
            target="fi"
        ):
            print(token, end="", flush=True)

        print()  # Newline after each segment

Chat Integration

Customer Support Widget

import { PauhuStream } from '@pauhu/js';

const pauhu = new PauhuStream({ apiKey: 'pk_...' });

// Translate incoming customer message
async function handleCustomerMessage(message) {
  const translation = document.getElementById('translation');
  translation.textContent = '';

  for await (const token of pauhu.translate({
    text: message,
    target: 'en',
    stream: true
  })) {
    translation.textContent += token;
  }
}

Bidirectional Translation

// Agent sees customer message in English
// Customer sees agent response in their language

async function bidirectionalChat(message, fromLang, toLang) {
  const translated = [];

  for await (const token of pauhu.translate({
    text: message,
    source: fromLang,
    target: toLang,
    stream: true
  })) {
    translated.push(token);
    updateUI(translated.join(''));
  }

  return translated.join('');
}

Latency Optimization

Edge Locations

Region Location Latency
EU-North Helsinki <50ms
EU-Central Frankfurt <80ms
EU-West Dublin <100ms
US-East Virginia <120ms
Asia-Pacific Singapore <150ms

Connection Pooling

from pauhu import Pauhu

# Reuse connection for multiple translations
client = Pauhu(
    connection_pool=True,
    pool_size=10
)

# First request establishes connection
result1 = client.translate_stream(text="Hello", target="fi")

# Subsequent requests reuse connection (<50ms overhead)
result2 = client.translate_stream(text="World", target="fi")

Event-Driven Architecture

from pauhu.events import TranslationEventHandler

class MyHandler(TranslationEventHandler):
    def on_start(self, request_id):
        print(f"Started: {request_id}")

    def on_token(self, token, index):
        print(f"Token {index}: {token}")

    def on_complete(self, translation, latency_ms):
        print(f"Complete in {latency_ms}ms: {translation}")

    def on_error(self, error):
        print(f"Error: {error}")

client = Pauhu(event_handler=MyHandler())
client.translate_stream(text="Test", target="fi")

Quality vs Speed

from pauhu import Pauhu

client = Pauhu()

# Fast mode (lower latency, slightly lower quality)
fast = client.translate_stream(
    text="Quick translation needed",
    target="fi",
    quality="fast"  # <100ms
)

# Balanced mode (default)
balanced = client.translate_stream(
    text="Standard translation",
    target="fi",
    quality="balanced"  # <200ms
)

# Quality mode (higher latency, best quality)
quality = client.translate_stream(
    text="Important document",
    target="fi",
    quality="quality"  # <500ms
)

Getting Started

from pauhu import Pauhu

client = Pauhu()

# Stream your first translation
for token in client.translate_stream(
    text="Welcome to real-time translation.",
    target="fi"
):
    print(token, end="", flush=True)

# Output: "Tervetuloa reaaliaikaiseen käännökseen."