Skip to content

Quality Assurance

Trust, but verify. Pauhu includes comprehensive quality assurance with automated checks, terminology validation, consistency scoring, and human-in-the-loop workflows.


Quality Metrics

Metric Description Target
BLEU Score N-gram similarity >0.85
COMET Score Neural quality >0.90
Consistency Term consistency 100%
Completeness Nothing missing 100%
Fluency Natural language >0.95

Automated QA Checks

from pauhu import Pauhu

client = Pauhu()

# Translate with QA
result = client.translate(
    text="The agreement is valid until December 31, 2025.",
    target="fi",
    quality_check=True
)

print(result.quality_score)  # 0.97

for check in result.quality_checks:
    print(f"{check.name}: {check.status}")

# Output:
# completeness: passed
# terminology: passed
# numbers: passed
# dates: passed
# consistency: passed
# fluency: passed

QA Check Types

Completeness Check

# Verify nothing is missing or added
result = client.translate(
    text="Contact: john@example.com, Phone: +358 40 123 4567",
    target="fi",
    quality_checks=["completeness"]
)

print(result.checks.completeness)
# {
#   "status": "passed",
#   "source_elements": ["email", "phone"],
#   "target_elements": ["email", "phone"],
#   "missing": [],
#   "added": []
# }

Number Check

# Verify numbers are preserved correctly
result = client.translate(
    text="The budget is EUR 1,234,567.89",
    target="fi",
    quality_checks=["numbers"]
)

print(result.checks.numbers)
# {
#   "status": "passed",
#   "source_numbers": ["1,234,567.89"],
#   "target_numbers": ["1 234 567,89"],  # Finnish format
#   "format_correct": True
# }

Terminology Check

# Verify approved terms are used
result = client.translate(
    text="The data controller processes personal data",
    target="fi",
    quality_checks=["terminology"],
    terminology_id="gdpr_terms"
)

print(result.checks.terminology)
# {
#   "status": "passed",
#   "terms_checked": [
#     {"source": "data controller", "target": "rekisterinpitäjä", "correct": True},
#     {"source": "personal data", "target": "henkilötiedot", "correct": True}
#   ]
# }

Consistency Check

# Check same source = same target
result = client.translate_document(
    file_path="contract.pdf",
    target="fi",
    quality_checks=["consistency"]
)

print(result.checks.consistency)
# {
#   "status": "warning",
#   "inconsistencies": [
#     {
#       "source": "agreement",
#       "translations": ["sopimus", "sopimusasiakirja"],
#       "occurrences": [3, 1]
#     }
#   ]
# }

Quality Thresholds

# Set minimum quality thresholds
result = client.translate(
    text="Critical legal text",
    target="fi",
    quality_check=True,
    quality_threshold={
        "overall": 0.95,
        "completeness": 1.0,
        "terminology": 1.0,
        "fluency": 0.90
    }
)

if not result.meets_threshold:
    print("Quality below threshold!")
    for issue in result.threshold_failures:
        print(f"- {issue.check}: {issue.score} < {issue.required}")

Human-in-the-Loop

Review Workflow

from pauhu.workflows import ReviewWorkflow

# Create review workflow
workflow = ReviewWorkflow.create(
    name="Legal Review",
    rules=[
        {"if": "quality_score < 0.95", "then": "require_review"},
        {"if": "domain == '12 Law'", "then": "require_legal_review"},
        {"if": "word_count > 1000", "then": "require_senior_review"}
    ]
)

# Translate with workflow
result = client.translate(
    text="Binding arbitration clause",
    target="fi",
    workflow=workflow.id
)

print(result.workflow_status)  # "pending_review"
print(result.assigned_reviewer)  # "legal_team"

Review Interface

# Reviewer approves/corrects
from pauhu.workflows import Review

review = Review.get(result.review_id)

# Approve as-is
review.approve()

# Or correct and approve
review.correct(
    translation="Sitova välityslauseke",
    note="Used formal legal term"
)

Quality Reports

Document Report

# Generate quality report for document
report = client.quality_report(
    file_path="translated_document.pdf",
    source_lang="en",
    target_lang="fi"
)

report.save("quality_report.pdf")

print(report.summary)
# {
#   "overall_score": 0.94,
#   "segments_reviewed": 245,
#   "issues_found": 12,
#   "critical_issues": 0,
#   "warnings": 12
# }

Trend Analysis

# Quality trends over time
trends = client.quality_trends(
    project="legal_2025",
    period="last_30_days"
)

for day in trends.daily:
    print(f"{day.date}: {day.avg_quality:.2f}")

Quality Improvement

Feedback Loop

# Submit feedback for quality improvement
client.feedback(
    translation_id=result.id,
    rating=4,  # 1-5
    corrections=[
        {
            "original": "sopimusasiakirja",
            "corrected": "sopimus",
            "reason": "simpler term preferred"
        }
    ]
)

Model Improvement

# Corrections improve future translations
# After collecting sufficient feedback:

# Check improvement
stats = client.quality_improvement_stats(
    term="agreement",
    target_lang="fi"
)

print(f"Before corrections: {stats.before_accuracy:.2%}")
print(f"After corrections: {stats.after_accuracy:.2%}")

ISO 17100 Compliance

# ISO 17100 certified workflow
result = client.translate(
    text="Certified translation required",
    target="fi",
    workflow="iso_17100",
    quality_checks=["all"]
)

# Workflow includes:
# 1. Initial AI translation
# 2. Human translation review
# 3. Human revision
# 4. Quality assurance check
# 5. Final approval

print(result.certification)
# {
#   "standard": "ISO 17100:2015",
#   "translator": "Certified Translator #12345",
#   "reviewer": "Certified Reviewer #67890",
#   "date": "2025-01-15"
# }

Getting Started

from pauhu import Pauhu

client = Pauhu()

# Translate with quality checks
result = client.translate(
    text="The parties agree to the terms.",
    target="fi",
    quality_check=True
)

print(f"Translation: {result.translation}")
print(f"Quality Score: {result.quality_score:.2f}")

for check in result.quality_checks:
    status = "✓" if check.passed else "✗"
    print(f"  {status} {check.name}")

# Output:
# Translation: Osapuolet hyväksyvät ehdot.
# Quality Score: 0.98
#   ✓ completeness
#   ✓ terminology
#   ✓ fluency
#   ✓ consistency