File Hubs¶
Connect once, translate forever. Pauhu integrates with your existing file storage. Drop a document in English, find the Finnish version minutes later. Automatic, seamless, continuous.
Supported Integrations¶
| Platform | Read | Write | Watch | Webhooks |
|---|---|---|---|---|
| SharePoint | ||||
| OneDrive | ||||
| Google Drive | ||||
| Dropbox | ||||
| Box | ||||
| AWS S3 | ||||
| Azure Blob | ||||
| Google Cloud Storage | ||||
| SFTP | ||||
| SMB/CIFS |
Quick Setup¶
SharePoint Online¶
from pauhu import Pauhu
from pauhu.hubs import SharePoint
client = Pauhu()
# Connect to SharePoint
hub = SharePoint(
site_url="https://contoso.sharepoint.com/sites/Documents",
client_id="your-app-id",
client_secret="your-secret"
)
# Watch a folder for new documents
hub.watch(
source_folder="/Documents/English",
target_folder="/Documents/Finnish",
target_language="fi",
file_types=["docx", "pdf", "pptx"]
)
Google Drive¶
from pauhu.hubs import GoogleDrive
hub = GoogleDrive(
credentials_file="credentials.json"
)
# Sync entire folder
hub.sync(
source_folder="1234567890abcdef", # Folder ID
target_folder="0987654321fedcba",
target_language="fi"
)
AWS S3¶
from pauhu.hubs import S3
hub = S3(
bucket="my-documents",
region="eu-north-1",
access_key="AKIA...",
secret_key="..."
)
# Event-driven translation
hub.configure_trigger(
prefix="uploads/english/",
target_prefix="uploads/finnish/",
target_language="fi"
)
Watch Folder Pattern¶
sequenceDiagram
participant User
participant Source as Source Folder
participant Pauhu
participant Target as Target Folder
User->>Source: Upload document.pdf
Source->>Pauhu: New file event
Pauhu->>Pauhu: Translate document
Pauhu->>Target: Save document_fi.pdf
Target->>User: Notification (optional) Folder Structure Mapping¶
Mirror Structure¶
# Source and target folders mirror each other
hub.watch(
source_folder="/English",
target_folder="/Finnish",
structure="mirror" # Default
)
# Result:
# /English/Reports/Q1.pdf -> /Finnish/Reports/Q1.pdf
# /English/Legal/Contract.docx -> /Finnish/Legal/Contract.docx
Flat Structure¶
# All translated files in one folder
hub.watch(
source_folder="/English",
target_folder="/Finnish",
structure="flat"
)
# Result:
# /English/Reports/Q1.pdf -> /Finnish/Reports_Q1_fi.pdf
# /English/Legal/Contract.docx -> /Finnish/Legal_Contract_fi.docx
Language Suffix¶
# Keep in same folder with language suffix
hub.watch(
source_folder="/Documents",
target_folder="/Documents",
naming="suffix"
)
# Result:
# /Documents/Report.pdf -> /Documents/Report_fi.pdf
Multi-Language Sync¶
# Translate to multiple languages simultaneously
hub.watch(
source_folder="/English",
targets=[
{"folder": "/Finnish", "language": "fi"},
{"folder": "/Swedish", "language": "sv"},
{"folder": "/German", "language": "de"},
{"folder": "/French", "language": "fr"}
]
)
Filtering¶
File Type Filter¶
hub.watch(
source_folder="/Documents",
target_folder="/Documents_FI",
target_language="fi",
include=["docx", "pdf", "xlsx"],
exclude=["tmp", "bak"]
)
Name Pattern Filter¶
hub.watch(
source_folder="/Documents",
target_folder="/Documents_FI",
target_language="fi",
pattern="*.docx",
exclude_pattern="*_draft*"
)
Size Filter¶
hub.watch(
source_folder="/Documents",
target_folder="/Documents_FI",
target_language="fi",
max_size_mb=50 # Skip files > 50 MB
)
Conflict Resolution¶
hub.watch(
source_folder="/English",
target_folder="/Finnish",
target_language="fi",
on_conflict="version" # Options: skip, overwrite, version
)
# "version" behavior:
# Report.pdf exists -> Report_v2.pdf
# Report_v2.pdf exists -> Report_v3.pdf
Webhooks¶
Completion Webhook¶
hub.watch(
source_folder="/English",
target_folder="/Finnish",
target_language="fi",
webhook={
"url": "https://api.yourapp.com/pauhu/callback",
"events": ["completed", "failed"],
"headers": {"Authorization": "Bearer your-token"}
}
)
Webhook Payload¶
{
"event": "completed",
"timestamp": "2025-01-15T10:30:00Z",
"source": {
"path": "/English/Report.pdf",
"size": 1024000,
"modified": "2025-01-15T10:00:00Z"
},
"target": {
"path": "/Finnish/Report.pdf",
"size": 1048000,
"created": "2025-01-15T10:30:00Z"
},
"translation": {
"duration_ms": 5000,
"word_count": 5000,
"quality_score": 0.98
}
}
Monitoring¶
Dashboard¶
# Get sync status
status = hub.status()
print(status.files_pending) # 12
print(status.files_processing) # 3
print(status.files_completed) # 1547
print(status.files_failed) # 2
print(status.last_sync) # 2025-01-15T10:30:00Z
Logs¶
# Get recent activity
for event in hub.events(limit=100):
print(f"{event.timestamp}: {event.type} - {event.path}")
Error Handling¶
hub.watch(
source_folder="/English",
target_folder="/Finnish",
target_language="fi",
retry={
"max_attempts": 3,
"backoff": "exponential",
"initial_delay_seconds": 60
},
on_failure="quarantine", # Move to error folder
error_folder="/English/_errors"
)
Getting Started¶
from pauhu import Pauhu
from pauhu.hubs import SharePoint
client = Pauhu()
# Connect SharePoint
hub = SharePoint(
site_url="https://contoso.sharepoint.com/sites/Docs",
client_id="your-app-id",
client_secret="your-secret"
)
# Start watching
hub.watch(
source_folder="/Documents/English",
target_folder="/Documents/Finnish",
target_language="fi"
)
print("Watching for new documents...")