Source code for platypush.plugins.tts.piper

import os
import pathlib
import tempfile
import wave
from collections import defaultdict
from threading import RLock
from typing import Optional, Dict

from platypush.config import Config
from platypush.plugins import action
from platypush.plugins.tts import TtsPlugin


[docs] class TtsPiperPlugin(TtsPlugin): r""" Text-to-speech plugin that uses `Piper <https://github.com/OHF-Voice/piper1-gpl>`_, a fast and local neural text-to-speech engine. Install with: .. code-block:: bash $ pip install piper-tts You will also need to download at least one voice model. You can do so via the :meth:`.download_voice` action. Voice models are typically stored in ``~/.local/share/piper_tts``. The full list of supported voice models is `here <https://rhasspy.github.io/piper-samples/>`_. """
[docs] def __init__( self, *, model: Optional[str] = None, models_dir: Optional[str] = None, speaker_id: Optional[int] = None, length_scale: Optional[float] = None, noise_scale: Optional[float] = None, noise_w_scale: Optional[float] = None, use_cuda: bool = False, start_padding: float = 1, end_padding: float = 1, **kwargs, ): """ :param model: Path to the Piper ``.onnx`` default voice model file, or model name (e.g. ``en_US-hfc_female-medium``) relative to ``models_dir``. If not specified, it must be specified when :meth:`.say` is called, or a model should be downloaded via :meth:`.download_voice`. :param models_dir: Directory where Piper voice models are stored. Default: ``<WORKDIR>/piper_tts``. :param speaker_id: Default speaker ID for multi-speaker models (default: None). :param length_scale: Default speaking speed scale. Higher values make speech slower (default: voice default, typically 1.0). :param noise_scale: Default audio variation / expressiveness scale (default: voice default). :param noise_w_scale: Default phoneme width variation scale (default: voice default). :param use_cuda: Whether to use CUDA for GPU acceleration. Requires ``onnxruntime-gpu`` to be installed (default: False). :param start_padding: Silence, in seconds, to prepend before playing the audio. This gives the audio backend (e.g. PulseAudio/PipeWire) time to initialize the output path, avoiding the first fraction of generated speech being silently dropped (default: 1). :param end_padding: Silence, in seconds, to append before closing the playback stream. This avoids clipping the tail of short generated speech on some audio backends (default: 1). :param kwargs: Extra arguments to be passed to the :class:`platypush.plugins.tts.TtsPlugin` constructor. """ from piper import PiperVoice super().__init__(**kwargs) self._models_dir = ( os.path.expanduser(models_dir) if models_dir else os.path.join(Config.get_workdir(), "piper_tts") ) self._model = model self._speaker_id = speaker_id self._length_scale = length_scale self._noise_scale = noise_scale self._noise_w_scale = noise_w_scale self._use_cuda = use_cuda self.player_args.setdefault('start_padding', start_padding) self.player_args.setdefault('end_padding', end_padding) self._voices: Dict[str, Optional[PiperVoice]] = defaultdict(lambda: None) self._voices_locks = defaultdict(RLock)
def _get_voice(self, model: Optional[str] = None): from piper import PiperVoice is_default = not model if is_default: model = self._model if not model: raise ValueError('model must be specified') model_path = os.path.expanduser(model) if not model_path.endswith('.onnx'): model_path += '.onnx' if not os.path.exists(model_path): model_path = os.path.join(self._models_dir, model) if not model_path.endswith('.onnx'): model_path += '.onnx' with self._voices_locks[model_path]: voice = self._voices[model_path] if voice: return voice voice = self._voices[model_path] = PiperVoice.load( model_path, use_cuda=self._use_cuda ) return voice
[docs] @action def say( self, text: str, *_, model: Optional[str] = None, speaker_id: Optional[int] = None, length_scale: Optional[float] = None, noise_scale: Optional[float] = None, noise_w_scale: Optional[float] = None, output_file: Optional[str] = None, **player_args, ): """ Say some text. :param text: Text to say. :param model: Override the default voice model. :param speaker_id: Speaker ID override for multi-speaker models. :param length_scale: Speaking speed override. :param noise_scale: Audio variation override. :param noise_w_scale: Phoneme width variation override. :param output_file: If set, save the audio to the specified file instead of playing it. :param player_args: Extends the additional arguments to be passed to :meth:`platypush.plugins.sound.SoundPlugin.play` (like volume, duration, channels etc.). """ player_args.pop('language', None) voice = self._get_voice(model) from piper.config import SynthesisConfig syn_kwargs = {} sid = speaker_id if speaker_id is not None else self._speaker_id if sid is not None: syn_kwargs['speaker_id'] = sid length = length_scale if length_scale is not None else self._length_scale noise = noise_scale if noise_scale is not None else self._noise_scale noise_w = noise_w_scale if noise_w_scale is not None else self._noise_w_scale if length is not None: syn_kwargs['length_scale'] = length if noise is not None: syn_kwargs['noise_scale'] = noise if noise_w is not None: syn_kwargs['noise_w_scale'] = noise_w syn_config = SynthesisConfig(**syn_kwargs) if output_file: output_file = os.path.expanduser(output_file) with wave.open(output_file, 'wb') as wav_file: voice.synthesize_wav(text, wav_file, syn_config=syn_config) return with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f: tmp_path = f.name try: with wave.open(tmp_path, 'wb') as wav_file: voice.synthesize_wav(text, wav_file, syn_config=syn_config) player_args["join"] = True self._playback(tmp_path, **player_args) finally: if os.path.exists(tmp_path): os.unlink(tmp_path)
[docs] @action def download_voice(self, voice: str, models_dir: Optional[str] = None): """ Download a Piper voice model. :param voice: Name of the voice to download (e.g. ``en_US-lessac-medium``). :param models_dir: Directory to store the downloaded voice model (default: the configured ``models_dir``). """ import subprocess import sys cmd = [sys.executable, '-m', 'piper.download_voices', voice] models_dir = os.path.expanduser(models_dir or self._models_dir) cmd += ['--data-dir', os.path.expanduser(models_dir)] pathlib.Path(models_dir).mkdir(parents=True, exist_ok=True) result = subprocess.run(cmd, capture_output=True, text=True, check=False) if result.returncode != 0: raise RuntimeError(f'Failed to download voice "{voice}": {result.stderr}') self.logger.info('Voice "%s" downloaded successfully', voice)
# vim:sw=4:ts=4:et: