Translation Memory¶

Your translations, your memory. Pauhu learns from every correction you make. Consistent terminology across all your projects. Industry-standard TMX import/export.

How It Works¶

graph LR
    A[New Translation] --> B{Check TM}
    B -->|Match Found| C[Use TM Translation]
    B -->|No Match| D[AI Translation]
    D --> E[User Review]
    E -->|Correction| F[Save to TM]
    E -->|Approve| G[Save to TM]
    F --> H[Translation Memory]
    G --> H

Match Types¶

Match Type	Description	Usage
100% Match	Exact same segment	Auto-apply
Context Match	Same segment + context	Auto-apply
Fuzzy Match	Similar segment (70-99%)	Suggest
Terminology	Known terms	Enforce
No Match	New segment	AI translate

Using Translation Memory¶

Enable TM¶

from pauhu import Pauhu

client = Pauhu()

# Enable TM for translations
result = client.translate(
    text="The contract shall be governed by Finnish law.",
    target="fi",
    use_memory=True
)

print(result.tm_match)
# {
#   "type": "fuzzy",
#   "score": 0.92,
#   "source": "The agreement shall be governed by Finnish law.",
#   "target": "Sopimukseen sovelletaan Suomen lakia."
# }

print(result.translation)
# "Sopimukseen sovelletaan Suomen lakia."

Save Corrections¶

# User corrects translation
client.tm.save(
    source="The contract shall be governed by Finnish law.",
    target="Sopimukseen sovelletaan Suomen lakia.",
    source_lang="en",
    target_lang="fi",
    project="legal_contracts"
)

Translation Memory Management¶

Create TM¶

from pauhu.tm import TranslationMemory

# Create new TM
tm = TranslationMemory.create(
    name="Legal Contracts EN-FI",
    source_lang="en",
    target_lang="fi",
    domain="12 Law"
)

print(tm.id)  # "tm_abc123"

Import TMX¶

# Import existing TMX file
tm.import_tmx(
    file_path="existing_memory.tmx",
    on_duplicate="merge"  # Options: skip, merge, replace
)

print(f"Imported {tm.segment_count} segments")

Export TMX¶

# Export to TMX
tm.export_tmx(
    output_path="pauhu_memory.tmx",
    format="tmx_1.4b"
)

Project-Based Memory¶

# Use project-specific TM
result = client.translate(
    text="Force majeure clause",
    target="fi",
    use_memory=True,
    project="contract_2025"
)

# TM search hierarchy:
# 1. Project TM (contract_2025)
# 2. Domain TM (12 Law)
# 3. Global TM

Fuzzy Match Settings¶

# Configure fuzzy match thresholds
result = client.translate(
    text="The seller agrees to deliver the goods",
    target="fi",
    use_memory=True,
    tm_settings={
        "min_match": 0.70,      # Minimum fuzzy match score
        "auto_apply": 0.95,     # Auto-apply above this score
        "suggest": 0.70,        # Show suggestions above this
        "penalize_length": True  # Lower score for length diff
    }
)

Terminology Database¶

Add Terms¶

from pauhu.tm import Terminology

# Create terminology database
terms = Terminology.create(
    name="Financial Terms EN-FI",
    source_lang="en",
    target_lang="fi"
)

# Add terms
terms.add(
    source="accounts receivable",
    target="myyntisaamiset",
    domain="24 Finance",
    note="Accounting term"
)

terms.add(
    source="balance sheet",
    target="tase",
    domain="24 Finance"
)

Enforce Terminology¶

# Translation uses enforced terminology
result = client.translate(
    text="The accounts receivable appear on the balance sheet.",
    target="fi",
    use_memory=True,
    enforce_terminology=True
)

print(result.translation)
# "Myyntisaamiset näkyvät taseessa."

print(result.terms_applied)
# ["accounts receivable", "balance sheet"]

Bulk Operations¶

Batch Save¶

# Save multiple segments at once
segments = [
    {
        "source": "Terms and conditions",
        "target": "Ehdot ja edellytykset"
    },
    {
        "source": "Privacy policy",
        "target": "Tietosuojakäytäntö"
    },
    {
        "source": "Cookie policy",
        "target": "Evästekäytäntö"
    }
]

tm.save_batch(
    segments=segments,
    source_lang="en",
    target_lang="fi"
)

Pre-Translate¶

# Pre-translate document using TM
result = client.pretranslate_document(
    file_path="new_contract.docx",
    target="fi",
    tm_id="tm_abc123"
)

print(f"100% matches: {result.exact_matches}")
print(f"Fuzzy matches: {result.fuzzy_matches}")
print(f"No match: {result.no_matches}")

Analytics¶

TM Statistics¶

stats = tm.statistics()

print(f"Total segments: {stats.segment_count}")
print(f"Languages: {stats.source_lang} -> {stats.target_lang}")
print(f"Last updated: {stats.last_updated}")
print(f"Average segment length: {stats.avg_segment_length}")

Match Rate¶

# Get match rate for a document
rate = client.tm.analyze_document(
    file_path="document.docx",
    target="fi",
    tm_id="tm_abc123"
)

print(f"100% matches: {rate.exact_match_percent}%")
print(f"95-99% matches: {rate.high_fuzzy_percent}%")
print(f"70-94% matches: {rate.low_fuzzy_percent}%")
print(f"No match: {rate.no_match_percent}%")

CAT Tool Integration¶

memoQ¶

from pauhu.integrations import MemoQ

# Connect to memoQ server
memoq = MemoQ(
    server_url="https://memoq.company.com",
    api_key="your-api-key"
)

# Sync TM
memoq.sync_tm(
    memoq_tm_guid="...",
    pauhu_tm_id="tm_abc123"
)

SDL Trados¶

from pauhu.integrations import Trados

# Export compatible format
tm.export(
    output_path="memory.sdltm",
    format="trados"
)

Getting Started¶

from pauhu import Pauhu

client = Pauhu()

# Translate with TM
result = client.translate(
    text="Terms and conditions",
    target="fi",
    use_memory=True
)

print(result.translation)
# "Ehdot ja edellytykset"

# Save correction
client.tm.save(
    source="Terms and conditions",
    target="Käyttöehdot",  # Corrected translation
    source_lang="en",
    target_lang="fi"
)

# Next time, corrected version is used
result2 = client.translate(
    text="Terms and conditions",
    target="fi",
    use_memory=True
)

print(result2.translation)
# "Käyttöehdot"