Skip to content

File Hubs

Connect once, translate forever. Pauhu integrates with your existing file storage. Drop a document in English, find the Finnish version minutes later. Automatic, seamless, continuous.


Supported Integrations

Platform Read Write Watch Webhooks
SharePoint
OneDrive
Google Drive
Dropbox
Box
AWS S3
Azure Blob
Google Cloud Storage
SFTP
SMB/CIFS

Quick Setup

SharePoint Online

from pauhu import Pauhu
from pauhu.hubs import SharePoint

client = Pauhu()

# Connect to SharePoint
hub = SharePoint(
    site_url="https://contoso.sharepoint.com/sites/Documents",
    client_id="your-app-id",
    client_secret="your-secret"
)

# Watch a folder for new documents
hub.watch(
    source_folder="/Documents/English",
    target_folder="/Documents/Finnish",
    target_language="fi",
    file_types=["docx", "pdf", "pptx"]
)

Google Drive

from pauhu.hubs import GoogleDrive

hub = GoogleDrive(
    credentials_file="credentials.json"
)

# Sync entire folder
hub.sync(
    source_folder="1234567890abcdef",  # Folder ID
    target_folder="0987654321fedcba",
    target_language="fi"
)

AWS S3

from pauhu.hubs import S3

hub = S3(
    bucket="my-documents",
    region="eu-north-1",
    access_key="AKIA...",
    secret_key="..."
)

# Event-driven translation
hub.configure_trigger(
    prefix="uploads/english/",
    target_prefix="uploads/finnish/",
    target_language="fi"
)

Watch Folder Pattern

sequenceDiagram
    participant User
    participant Source as Source Folder
    participant Pauhu
    participant Target as Target Folder

    User->>Source: Upload document.pdf
    Source->>Pauhu: New file event
    Pauhu->>Pauhu: Translate document
    Pauhu->>Target: Save document_fi.pdf
    Target->>User: Notification (optional)

Folder Structure Mapping

Mirror Structure

# Source and target folders mirror each other
hub.watch(
    source_folder="/English",
    target_folder="/Finnish",
    structure="mirror"  # Default
)

# Result:
# /English/Reports/Q1.pdf -> /Finnish/Reports/Q1.pdf
# /English/Legal/Contract.docx -> /Finnish/Legal/Contract.docx

Flat Structure

# All translated files in one folder
hub.watch(
    source_folder="/English",
    target_folder="/Finnish",
    structure="flat"
)

# Result:
# /English/Reports/Q1.pdf -> /Finnish/Reports_Q1_fi.pdf
# /English/Legal/Contract.docx -> /Finnish/Legal_Contract_fi.docx

Language Suffix

# Keep in same folder with language suffix
hub.watch(
    source_folder="/Documents",
    target_folder="/Documents",
    naming="suffix"
)

# Result:
# /Documents/Report.pdf -> /Documents/Report_fi.pdf

Multi-Language Sync

# Translate to multiple languages simultaneously
hub.watch(
    source_folder="/English",
    targets=[
        {"folder": "/Finnish", "language": "fi"},
        {"folder": "/Swedish", "language": "sv"},
        {"folder": "/German", "language": "de"},
        {"folder": "/French", "language": "fr"}
    ]
)

Filtering

File Type Filter

hub.watch(
    source_folder="/Documents",
    target_folder="/Documents_FI",
    target_language="fi",
    include=["docx", "pdf", "xlsx"],
    exclude=["tmp", "bak"]
)

Name Pattern Filter

hub.watch(
    source_folder="/Documents",
    target_folder="/Documents_FI",
    target_language="fi",
    pattern="*.docx",
    exclude_pattern="*_draft*"
)

Size Filter

hub.watch(
    source_folder="/Documents",
    target_folder="/Documents_FI",
    target_language="fi",
    max_size_mb=50  # Skip files > 50 MB
)

Conflict Resolution

hub.watch(
    source_folder="/English",
    target_folder="/Finnish",
    target_language="fi",
    on_conflict="version"  # Options: skip, overwrite, version
)

# "version" behavior:
# Report.pdf exists -> Report_v2.pdf
# Report_v2.pdf exists -> Report_v3.pdf

Webhooks

Completion Webhook

hub.watch(
    source_folder="/English",
    target_folder="/Finnish",
    target_language="fi",
    webhook={
        "url": "https://api.yourapp.com/pauhu/callback",
        "events": ["completed", "failed"],
        "headers": {"Authorization": "Bearer your-token"}
    }
)

Webhook Payload

{
  "event": "completed",
  "timestamp": "2025-01-15T10:30:00Z",
  "source": {
    "path": "/English/Report.pdf",
    "size": 1024000,
    "modified": "2025-01-15T10:00:00Z"
  },
  "target": {
    "path": "/Finnish/Report.pdf",
    "size": 1048000,
    "created": "2025-01-15T10:30:00Z"
  },
  "translation": {
    "duration_ms": 5000,
    "word_count": 5000,
    "quality_score": 0.98
  }
}

Monitoring

Dashboard

# Get sync status
status = hub.status()

print(status.files_pending)     # 12
print(status.files_processing)  # 3
print(status.files_completed)   # 1547
print(status.files_failed)      # 2
print(status.last_sync)         # 2025-01-15T10:30:00Z

Logs

# Get recent activity
for event in hub.events(limit=100):
    print(f"{event.timestamp}: {event.type} - {event.path}")

Error Handling

hub.watch(
    source_folder="/English",
    target_folder="/Finnish",
    target_language="fi",
    retry={
        "max_attempts": 3,
        "backoff": "exponential",
        "initial_delay_seconds": 60
    },
    on_failure="quarantine",  # Move to error folder
    error_folder="/English/_errors"
)

Getting Started

from pauhu import Pauhu
from pauhu.hubs import SharePoint

client = Pauhu()

# Connect SharePoint
hub = SharePoint(
    site_url="https://contoso.sharepoint.com/sites/Docs",
    client_id="your-app-id",
    client_secret="your-secret"
)

# Start watching
hub.watch(
    source_folder="/Documents/English",
    target_folder="/Documents/Finnish",
    target_language="fi"
)

print("Watching for new documents...")