Beyond text. Pauhu processes images, audio, and video with the same precision as text. Extract text from images, transcribe speech, synthesize translated audio, and subtitle videos.
frompauhuimportPauhuclient=Pauhu()# Translate text in imageresult=client.translate_image(image_path="sign.jpg",target="fi")print(result.extracted_text)# "Exit"print(result.translation)# "Uloskäynti"print(result.confidence)# 0.98
# Create translated image with text overlayresult=client.translate_image(image_path="infographic.png",target="fi",output="overlay"# Replace text in image)result.save("infographic_fi.png")
# Process folder of imagesresults=client.translate_images(folder_path="/images/english",target="fi",output_folder="/images/finnish",formats=["png","jpg","tiff"])
frompauhu.realtimeimportAudioStreamclient=Pauhu()asyncdeftranslate_live():audio=AudioStream(device="microphone")asyncforsegmentinaudio.transcribe_and_translate(source="en",target="fi",output="audio"):# Play translated audio in real-timesegment.play()# Or access textprint(f"EN: {segment.source_text}")print(f"FI: {segment.target_text}")
# Generate Finnish speech from English textresult=client.text_to_speech(text="Welcome to Finland",source="en",target="fi",voice="female_1"# Finnish voice)result.save("welcome_fi.mp3")
# Full audio dubbingresult=client.translate_video(video_path="training.mp4",target="fi",output="dubbed",preserve_music=True,# Keep background musicvoice_cloning=True# Match original speaker)result.save("training_fi.mp4")
# Use specific voiceresult=client.translate_audio(audio_path="speech.mp3",target="fi",voice="fi_female_1",speed=1.0,# Normal speedpitch=1.0# Normal pitch)
frompauhuimportPauhuclient=Pauhu()# High-quality settings for productionresult=client.translate_audio(audio_path="audiobook.mp3",target="fi",quality={"transcription":"whisper-large","translation":"quality","tts":"neural-hd","sample_rate":48000,"bitrate":320})# Fast settings for real-timeresult=client.translate_audio(audio_path="call.mp3",target="fi",quality={"transcription":"whisper-small","translation":"fast","tts":"neural","sample_rate":16000,"bitrate":128})