Translation Memory¶
Your translations, your memory. Pauhu learns from every correction you make. Consistent terminology across all your projects. Industry-standard TMX import/export.
How It Works¶
graph LR
A[New Translation] --> B{Check TM}
B -->|Match Found| C[Use TM Translation]
B -->|No Match| D[AI Translation]
D --> E[User Review]
E -->|Correction| F[Save to TM]
E -->|Approve| G[Save to TM]
F --> H[Translation Memory]
G --> H Match Types¶
| Match Type | Description | Usage |
|---|---|---|
| 100% Match | Exact same segment | Auto-apply |
| Context Match | Same segment + context | Auto-apply |
| Fuzzy Match | Similar segment (70-99%) | Suggest |
| Terminology | Known terms | Enforce |
| No Match | New segment | AI translate |
Using Translation Memory¶
Enable TM¶
from pauhu import Pauhu
client = Pauhu()
# Enable TM for translations
result = client.translate(
text="The contract shall be governed by Finnish law.",
target="fi",
use_memory=True
)
print(result.tm_match)
# {
# "type": "fuzzy",
# "score": 0.92,
# "source": "The agreement shall be governed by Finnish law.",
# "target": "Sopimukseen sovelletaan Suomen lakia."
# }
print(result.translation)
# "Sopimukseen sovelletaan Suomen lakia."
Save Corrections¶
# User corrects translation
client.tm.save(
source="The contract shall be governed by Finnish law.",
target="Sopimukseen sovelletaan Suomen lakia.",
source_lang="en",
target_lang="fi",
project="legal_contracts"
)
Translation Memory Management¶
Create TM¶
from pauhu.tm import TranslationMemory
# Create new TM
tm = TranslationMemory.create(
name="Legal Contracts EN-FI",
source_lang="en",
target_lang="fi",
domain="12 Law"
)
print(tm.id) # "tm_abc123"
Import TMX¶
# Import existing TMX file
tm.import_tmx(
file_path="existing_memory.tmx",
on_duplicate="merge" # Options: skip, merge, replace
)
print(f"Imported {tm.segment_count} segments")
Export TMX¶
Project-Based Memory¶
# Use project-specific TM
result = client.translate(
text="Force majeure clause",
target="fi",
use_memory=True,
project="contract_2025"
)
# TM search hierarchy:
# 1. Project TM (contract_2025)
# 2. Domain TM (12 Law)
# 3. Global TM
Fuzzy Match Settings¶
# Configure fuzzy match thresholds
result = client.translate(
text="The seller agrees to deliver the goods",
target="fi",
use_memory=True,
tm_settings={
"min_match": 0.70, # Minimum fuzzy match score
"auto_apply": 0.95, # Auto-apply above this score
"suggest": 0.70, # Show suggestions above this
"penalize_length": True # Lower score for length diff
}
)
Terminology Database¶
Add Terms¶
from pauhu.tm import Terminology
# Create terminology database
terms = Terminology.create(
name="Financial Terms EN-FI",
source_lang="en",
target_lang="fi"
)
# Add terms
terms.add(
source="accounts receivable",
target="myyntisaamiset",
domain="24 Finance",
note="Accounting term"
)
terms.add(
source="balance sheet",
target="tase",
domain="24 Finance"
)
Enforce Terminology¶
# Translation uses enforced terminology
result = client.translate(
text="The accounts receivable appear on the balance sheet.",
target="fi",
use_memory=True,
enforce_terminology=True
)
print(result.translation)
# "Myyntisaamiset näkyvät taseessa."
print(result.terms_applied)
# ["accounts receivable", "balance sheet"]
Bulk Operations¶
Batch Save¶
# Save multiple segments at once
segments = [
{
"source": "Terms and conditions",
"target": "Ehdot ja edellytykset"
},
{
"source": "Privacy policy",
"target": "Tietosuojakäytäntö"
},
{
"source": "Cookie policy",
"target": "Evästekäytäntö"
}
]
tm.save_batch(
segments=segments,
source_lang="en",
target_lang="fi"
)
Pre-Translate¶
# Pre-translate document using TM
result = client.pretranslate_document(
file_path="new_contract.docx",
target="fi",
tm_id="tm_abc123"
)
print(f"100% matches: {result.exact_matches}")
print(f"Fuzzy matches: {result.fuzzy_matches}")
print(f"No match: {result.no_matches}")
Analytics¶
TM Statistics¶
stats = tm.statistics()
print(f"Total segments: {stats.segment_count}")
print(f"Languages: {stats.source_lang} -> {stats.target_lang}")
print(f"Last updated: {stats.last_updated}")
print(f"Average segment length: {stats.avg_segment_length}")
Match Rate¶
# Get match rate for a document
rate = client.tm.analyze_document(
file_path="document.docx",
target="fi",
tm_id="tm_abc123"
)
print(f"100% matches: {rate.exact_match_percent}%")
print(f"95-99% matches: {rate.high_fuzzy_percent}%")
print(f"70-94% matches: {rate.low_fuzzy_percent}%")
print(f"No match: {rate.no_match_percent}%")
CAT Tool Integration¶
memoQ¶
from pauhu.integrations import MemoQ
# Connect to memoQ server
memoq = MemoQ(
server_url="https://memoq.company.com",
api_key="your-api-key"
)
# Sync TM
memoq.sync_tm(
memoq_tm_guid="...",
pauhu_tm_id="tm_abc123"
)
SDL Trados¶
from pauhu.integrations import Trados
# Export compatible format
tm.export(
output_path="memory.sdltm",
format="trados"
)
Getting Started¶
from pauhu import Pauhu
client = Pauhu()
# Translate with TM
result = client.translate(
text="Terms and conditions",
target="fi",
use_memory=True
)
print(result.translation)
# "Ehdot ja edellytykset"
# Save correction
client.tm.save(
source="Terms and conditions",
target="Käyttöehdot", # Corrected translation
source_lang="en",
target_lang="fi"
)
# Next time, corrected version is used
result2 = client.translate(
text="Terms and conditions",
target="fi",
use_memory=True
)
print(result2.translation)
# "Käyttöehdot"